pax_global_header00006660000000000000000000000064145442331310014512gustar00rootroot0000000000000052 comment=24da95d1fb509d3d5ca41dfcf5b1bd271337abf6 golang-github-davidmytton-url-verifier-1.0.0/000077500000000000000000000000001454423313100212105ustar00rootroot00000000000000golang-github-davidmytton-url-verifier-1.0.0/.github/000077500000000000000000000000001454423313100225505ustar00rootroot00000000000000golang-github-davidmytton-url-verifier-1.0.0/.github/dependabot.yml000066400000000000000000000003011454423313100253720ustar00rootroot00000000000000version: 2 updates: - package-ecosystem: gomod directory: / schedule: interval: daily - package-ecosystem: github-actions directory: / schedule: interval: daily golang-github-davidmytton-url-verifier-1.0.0/.github/workflows/000077500000000000000000000000001454423313100246055ustar00rootroot00000000000000golang-github-davidmytton-url-verifier-1.0.0/.github/workflows/go.yml000066400000000000000000000011551454423313100257370ustar00rootroot00000000000000# This workflow will build a golang project # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-go name: Go CI on: [push, pull_request] jobs: build: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - name: Set up Go uses: actions/setup-go@v3 with: go-version: 1.19 - name: Build run: go build -v ./... - name: Test run: go test -v ./... -race -coverprofile=coverage.txt -covermode=atomic - name: Upload coverage to Codecov uses: codecov/codecov-action@v3 golang-github-davidmytton-url-verifier-1.0.0/.gitignore000066400000000000000000000004661454423313100232060ustar00rootroot00000000000000# Binaries for programs and plugins *.exe *.exe~ *.dll *.so *.dylib # Test binary, built with `go test -c` *.test # Output of the go coverage tool, specifically when used with LiteIDE *.out coverage.txt # Dependency directories (remove the comment below to include it) # vendor/ # Go workspace file go.workgolang-github-davidmytton-url-verifier-1.0.0/.trunk/000077500000000000000000000000001454423313100224315ustar00rootroot00000000000000golang-github-davidmytton-url-verifier-1.0.0/.trunk/.gitignore000066400000000000000000000001051454423313100244150ustar00rootroot00000000000000*out *logs *actions *notifications plugins user_trunk.yaml user.yaml golang-github-davidmytton-url-verifier-1.0.0/.trunk/configs/000077500000000000000000000000001454423313100240615ustar00rootroot00000000000000golang-github-davidmytton-url-verifier-1.0.0/.trunk/configs/.markdownlint.yaml000066400000000000000000000003311454423313100275310ustar00rootroot00000000000000# Autoformatter friendly markdownlint config (all formatting rules disabled) default: true blank_lines: false bullet: false html: false indentation: false line_length: false spaces: false url: false whitespace: false golang-github-davidmytton-url-verifier-1.0.0/.trunk/trunk.yaml000066400000000000000000000007721454423313100244660ustar00rootroot00000000000000version: 0.1 cli: version: 1.3.1 plugins: sources: - id: trunk ref: v0.0.8 uri: https://github.com/trunk-io/plugins lint: enabled: - actionlint@1.6.22 - golangci-lint@1.50.1 - gitleaks@8.15.2 - gofmt@1.19.3 - markdownlint@0.33.0 - git-diff-check - prettier@2.8.2 runtimes: enabled: - go@1.18.3 - node@18.12.1 actions: disabled: - trunk-check-pre-push - trunk-fmt-pre-commit enabled: - trunk-announce - trunk-upgrade-available golang-github-davidmytton-url-verifier-1.0.0/CHANGELOG.md000066400000000000000000000011041454423313100230150ustar00rootroot00000000000000# Change log ## 1.0.0 (2023-01-13) - First stable release. No changes from 0.2.1. ## 0.2.1 (2023-01-06) - Fix panic on invalid URL. ## 0.2.0 (2023-01-06) - Limit HTTP reachability checks to only execute against hosts with HTTP or HTTPS schemas. - Check the IPs the host resolves to and prevent executing reachability checks againsts internal IPs. This provides a layer of protection agains SSRF attacks, but can be disabled with `verifier.AllowHTTPCheckInternal()`. ## v0.1.1 (2023-01-06) - Fixed module path declaration. ## v0.1.0 (2023-01-06) - Initial version. golang-github-davidmytton-url-verifier-1.0.0/LICENSE000066400000000000000000000020551454423313100222170ustar00rootroot00000000000000MIT License Copyright (c) 2023 David Mytton Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. golang-github-davidmytton-url-verifier-1.0.0/README.md000066400000000000000000000110231454423313100224640ustar00rootroot00000000000000# url-verifier 🔗 A Go library for URL validation and verification: does this URL actually work? [![Build Status](https://github.com/davidmytton/url-verifier/actions/workflows/go.yml/badge.svg)](https://github.com/davidmytton/url-verifier/actions) [![codecov](https://codecov.io/gh/davidmytton/url-verifier/branch/main/graph/badge.svg?token=HXSXEHU79J)](https://codecov.io/gh/davidmytton/url-verifier) ## Features - **URL Validation:** validates whether a string is a valid URL. - **Different Validation Types:** validates whether the URL is valid according to a "human" definition of a correct URL, strict compliance with [RFC3986](https://www.rfc-editor.org/rfc/rfc3986) (Uniform Resource Identifier (URI): Generic Syntax), and/or compliance with RFC3986 with the addition of a schema e.g. HTTPS. - **Reachability:** verifies whether the URL is actually reachable via an HTTP GET request and provides the status code returned. ## Rationale There are several methods of validating URLs in Go depending on what you're trying to achieve. Strict, technical validation can be done through a simple call to [`url.Parse`](https://pkg.go.dev/net/url#Parse) in Go's Standard library or a more "human" definition of a valid URL using [govalidator](https://github.com/asaskevich/govalidator) (which is what this library uses internally for syntax verification). However, this will successfully validate all types of URLs, from relative paths through to hostnames without a scheme. Often, when building user-facing applications, what we actually want is a way to check whether the URL input provided will actually work i.e. it's valid, it resolves, and it can be loaded in a web browser. ## Install Use `go get` to install this package. ```shell go get -u github.com/davidmytton/url-verifier ``` ## Usage ### Basic usage Use `Verify` to check whether a URL is correct: ```go package main import ( "fmt" urlverifier "github.com/davidmytton/url-verifier" ) func main() { url := "https://example.com/" verifier := urlverifier.NewVerifier() ret, err := verifier.Verify(url) if err != nil { fmt.Errorf("Error: %s", err) } fmt.Printf("Result: %+v\n", ret) /* Result: &{ URL:https://example.com/ URLComponents:https://example.com/ IsURL:true IsRFC3986URL:true IsRFC3986URI:true HTTP: } */ } ``` ### URL reachability check Call `EnableHTTPCheck()` to issue a `GET` request to the HTTP or HTTPS URL and check whether it is reachable and successfully returns a response (a success (2xx) or success-like code (3xx)). Non-HTTP(S) URLs will return an error. ```go package main import ( "fmt" urlverifier "github.com/davidmytton/url-verifier" ) func main() { url := "https://example.com/" verifier := urlverifier.NewVerifier() verifier.EnableHTTPCheck() ret, err := verifier.Verify(url) if err != nil { fmt.Errorf("Error: %s", err) } fmt.Printf("Result: %+v\n", ret) fmt.Printf("HTTP: %+v\n", ret.HTTP) if ret.HTTP.IsSuccess { fmt.Println("The URL is reachable with status code", ret.HTTP.StatusCode) } /* Result: &{ URL:https://example.com/ URLComponents:https://example.com/ IsURL:true IsRFC3986URL:true IsRFC3986URI:true HTTP:0x140000b6a50 } HTTP: &{ Reachable:true StatusCode:200 IsSuccess:true } The URL is reachable with status code 200 */ } ``` ## HTTP checks against internal URLs By default, the reachability checks are only executed if the host resolves to a non-internal IP address. An internal IP address is defined as any of: [private](https://pkg.go.dev/net#IP.IsPrivate), [loopback](https://pkg.go.dev/net#IP.IsLoopback), [link-local unicast](https://pkg.go.dev/net#IP.IsLinkLocalUnicast), [link-local multicast](https://pkg.go.dev/net#IP.IsLinkLocalMulticast), [interface-local multicast](https://pkg.go.dev/net#IP.IsInterfaceLocalMulticast), or [unspecified](https://pkg.go.dev/net#IP.IsUnspecified). This is one layer of protection against [Server Side Request Forgery](https://cheatsheetseries.owasp.org/cheatsheets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet.html#application-layer_1) (SSRF) requests. To allow internal HTTP checks, call `verifier.AllowHTTPCheckInternal()`: ```go urlToCheck := "http://localhost:3000" verifier := NewVerifier() verifier.EnableHTTPCheck() // Danger: Makes SSRF easier! verifier.AllowHTTPCheckInternal() ret, err := verifier.Verify(urlToCheck) ... ``` ## Credits This library is heavily inspired by [`email-verifier`](https://github.com/AfterShip/email-verifier). ## License This package is licensed under the MIT License. golang-github-davidmytton-url-verifier-1.0.0/go.mod000066400000000000000000000004761454423313100223250ustar00rootroot00000000000000module github.com/davidmytton/url-verifier go 1.19 require ( github.com/asaskevich/govalidator v0.0.0-20210307081110-f21760c49a8d github.com/stretchr/testify v1.8.1 ) require ( github.com/davecgh/go-spew v1.1.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) golang-github-davidmytton-url-verifier-1.0.0/go.sum000066400000000000000000000033371454423313100223510ustar00rootroot00000000000000github.com/asaskevich/govalidator v0.0.0-20210307081110-f21760c49a8d h1:Byv0BzEl3/e6D5CLfI0j/7hiIEtvGVFPCZ7Ei2oq8iQ= github.com/asaskevich/govalidator v0.0.0-20210307081110-f21760c49a8d/go.mod h1:WaHUgvxTVq04UNunO+XhnAqY/wQc+bxr74GqbsZ/Jqw= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= golang-github-davidmytton-url-verifier-1.0.0/http_check.go000066400000000000000000000020221454423313100236470ustar00rootroot00000000000000// SPDX-License-Identifier: MIT package urlverifier import "net/http" // HTTP is the result of a HTTP check type HTTP struct { Reachable bool `json:"reachable"` // Whether the URL is reachable via HTTP. This may be true even if the response is an HTTP error e.g. a 500 error. StatusCode int `json:"status_code"` // The HTTP status code IsSuccess bool `josn:"is_success"` // Whether the HTTP response is a success (2xx) or success-like code (3xx) } // CheckHTTP checks if the URL is reachable via HTTP func (v *Verifier) CheckHTTP(urlToCheck string) (*HTTP, error) { ret := HTTP{ Reachable: false, IsSuccess: false, } // Check if the URL is reachable via HTTP resp, err := http.Get(urlToCheck) if err != nil { return &ret, err } defer resp.Body.Close() ret.Reachable = true ret.StatusCode = resp.StatusCode // Check if the HTTP response is a success (2xx) or success-like code (3xx) if resp.StatusCode >= http.StatusOK && resp.StatusCode < http.StatusBadRequest { ret.IsSuccess = true } return &ret, nil } golang-github-davidmytton-url-verifier-1.0.0/http_check_test.go000066400000000000000000000021061454423313100247110ustar00rootroot00000000000000// SPDX-License-Identifier: MIT package urlverifier import ( "net/url" "testing" "github.com/stretchr/testify/assert" ) func TestCheckHTTP_Status200(t *testing.T) { urlToCheck := "http://example.com/" verifier := NewVerifier() ret, err := verifier.CheckHTTP(urlToCheck) expected := &HTTP{ Reachable: true, StatusCode: 200, IsSuccess: true, } assert.Equal(t, expected, ret) assert.Nil(t, err) } func TestCheckHTTP_Status404(t *testing.T) { urlToCheck := "http://example.com/notfound" verifier := NewVerifier() ret, err := verifier.CheckHTTP(urlToCheck) expected := &HTTP{ Reachable: true, StatusCode: 404, IsSuccess: false, } assert.Equal(t, expected, ret) assert.Nil(t, err) } func TestCheckHTTP_Unreachable(t *testing.T) { urlToCheck := "http://example.unreachable" verifier := NewVerifier() ret, err := verifier.CheckHTTP(urlToCheck) expected := &HTTP{ Reachable: false, IsSuccess: false, } assert.Equal(t, expected, ret) assert.IsType(t, &url.Error{}, err) assert.ErrorContains(t, err, "lookup example.unreachable: no such host") } golang-github-davidmytton-url-verifier-1.0.0/verifier.go000066400000000000000000000112771454423313100233620ustar00rootroot00000000000000// Package urlverifier is a Go library for URL validation and verification: does // this URL actually work? // SPDX-License-Identifier: MIT package urlverifier import ( "errors" "fmt" "net" "net/url" "github.com/asaskevich/govalidator" ) // Verifier is a URL Verifier. Create one using NewVerifier() type Verifier struct { httpCheckEnabled bool // Whether to check if the URL is reachable via HTTP (default: false) allowHttpCheckInternal bool // Whether to allow HTTP checks to hosts that resolve to internal IPs (default: false) } // Result is the result of a URL verification type Result struct { URL string `json:"url"` // The URL that was checked URLComponents *url.URL `json:"url_components"` // The URL components, if the URL is valid IsURL bool `json:"is_url"` // Whether the URL is valid IsRFC3986URL bool `json:"is_rfc3986_url"` // Whether the URL is a valid URL according to RFC 3986. This is the same as IsRFC3986URI but with a check for a scheme. IsRFC3986URI bool `json:"is_rfc3986_uri"` // Whether the URL is a valid URI according to RFC 3986 HTTP *HTTP `json:"http"` // The result of a HTTP check, if enabled } // NewVerifier creates a new URL Verifier func NewVerifier() *Verifier { return &Verifier{allowHttpCheckInternal: false} } // Verify verifies a URL. It checks if the URL is valid, parses it if so, and // checks if it is valid according to RFC 3986 (as a URI without a scheme and a // URL with a scheme). If the HTTP check is enabled, it also checks if the URL // is reachable via HTTP. func (v *Verifier) Verify(rawURL string) (*Result, error) { ret := Result{ URL: rawURL, IsURL: false, IsRFC3986URL: false, IsRFC3986URI: false, } // Check if the URL is valid ret.IsURL = govalidator.IsURL(ret.URL) // If the URL is valid, parse it if ret.IsURL { p, err := url.Parse(ret.URL) if err != nil { return &ret, err } ret.URLComponents = p } // Check if the URL is a valid URI according to RFC 3986, plus a check for a // scheme. ret.IsRFC3986URL = v.IsRequestURL(ret.URL) // Check if the URL is a valid URI according to RFC 3986 ret.IsRFC3986URI = v.IsRequestURI(ret.URL) // Check if the URL is reachable via HTTP if v.httpCheckEnabled { if ret.URLComponents != nil && (ret.URLComponents.Scheme == "http" || ret.URLComponents.Scheme == "https") { if !v.allowHttpCheckInternal { // Lookup host IP host := ret.URLComponents.Hostname() ips, err := net.LookupIP(host) if err != nil { return &ret, err } // Check each IP to see if it is an internal IP for _, ip := range ips { if ip.IsPrivate() || ip.IsLoopback() || ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() || ip.IsInterfaceLocalMulticast() || ip.IsUnspecified() { message := fmt.Sprintf("unable to check if the URL is reachable via HTTP: the URL %s resolves to an internal IP %s", host, ip) return &ret, errors.New(message) } } } http, err := v.CheckHTTP(ret.URL) if err != nil { ret.HTTP = http return &ret, err } ret.HTTP = http } else { return &ret, errors.New("unable to check if the URL is reachable via HTTP: the URL does not have a HTTP or HTTPS scheme") } } return &ret, nil } // IsRequestURL checks if the string rawURL, assuming it was received in an HTTP // request, is a valid URL confirm to RFC 3986. Implemented from govalidator: // https://github.com/asaskevich/govalidator/blob/f21760c49a8d602d863493de796926d2a5c1138d/validator.go#L130 func (v *Verifier) IsRequestURL(rawURL string) bool { url, err := url.ParseRequestURI(rawURL) if err != nil { return false // Couldn't even parse the rawURL } if len(url.Scheme) == 0 { return false // No Scheme found } return true } // IsRequestURI checks if the string rawURL, assuming it was received in an HTTP // request, is an absolute URI or an absolute path. Implemented from // govalidator: // https://github.com/asaskevich/govalidator/blob/f21760c49a8d602d863493de796926d2a5c1138d/validator.go#L144 func (v *Verifier) IsRequestURI(rawURL string) bool { _, err := url.ParseRequestURI(rawURL) return err == nil } // DisableHTTPCheck disables checking if the URL is reachable via HTTP func (v *Verifier) DisableHTTPCheck() { v.httpCheckEnabled = false } // EnableHTTPCheck enables checking if the URL is reachable via HTTP func (v *Verifier) EnableHTTPCheck() { v.httpCheckEnabled = true } // AllowHTTPCheckInternal allows checking internal URLs func (v *Verifier) AllowHTTPCheckInternal() { v.allowHttpCheckInternal = true } // DisallowHTTPCheckInternal disallows checking internal URLs func (v *Verifier) DisallowHTTPCheckInternal() { v.allowHttpCheckInternal = false } golang-github-davidmytton-url-verifier-1.0.0/verifier_test.go000066400000000000000000000303751454423313100244210ustar00rootroot00000000000000// SPDX-License-Identifier: MIT package urlverifier import ( "fmt" "net" "net/http" "net/http/httptest" "net/url" "testing" "github.com/stretchr/testify/assert" ) var testURLs = []struct { rawURL string urlComponents *url.URL isURL bool isRFC3986URL bool isRFC3986URI bool }{ {rawURL: "http://example.com", urlComponents: &url.URL{Scheme: "http", Host: "example.com"}, isURL: true, isRFC3986URL: true, isRFC3986URI: true}, {rawURL: "https://example.com", urlComponents: &url.URL{Scheme: "https", Host: "example.com"}, isURL: true, isRFC3986URL: true, isRFC3986URI: true}, {rawURL: "http://example.com/", urlComponents: &url.URL{Scheme: "http", Host: "example.com", Path: "/"}, isURL: true, isRFC3986URL: true, isRFC3986URI: true}, {rawURL: "http://example.com/path", urlComponents: &url.URL{Scheme: "http", Host: "example.com", Path: "/path"}, isURL: true, isRFC3986URL: true, isRFC3986URI: true}, {rawURL: "http://example.com/path?query", urlComponents: &url.URL{Scheme: "http", Host: "example.com", Path: "/path", RawQuery: "query"}, isURL: true, isRFC3986URL: true, isRFC3986URI: true}, {rawURL: "http://example.com/path?query#fragment", urlComponents: &url.URL{Scheme: "http", Host: "example.com", Path: "/path", RawQuery: "query", Fragment: "fragment"}, isURL: true, isRFC3986URL: true, isRFC3986URI: true}, {rawURL: "http://user:pass@www.example.com/", urlComponents: &url.URL{Scheme: "http", Host: "www.example.com", Path: "/", User: url.UserPassword("user", "pass")}, isURL: true, isRFC3986URL: true, isRFC3986URI: true}, {rawURL: "", urlComponents: nil, isURL: false, isRFC3986URL: false, isRFC3986URI: false}, {rawURL: "example.com", urlComponents: &url.URL{Scheme: "", Host: "", Path: "example.com"}, isURL: true, isRFC3986URL: false, isRFC3986URI: false}, {rawURL: "http://example.dev/", urlComponents: &url.URL{Scheme: "http", Host: "example.dev", Path: "/"}, isURL: true, isRFC3986URL: true, isRFC3986URI: true}, {rawURL: "http://example.中文网/", urlComponents: &url.URL{Scheme: "http", Host: "example.中文网", Path: "/"}, isURL: true, isRFC3986URL: true, isRFC3986URI: true}, {rawURL: "http://example.com:8080", urlComponents: &url.URL{Scheme: "http", Host: "example.com:8080"}, isURL: true, isRFC3986URL: true, isRFC3986URI: true}, {rawURL: "ftp://example.com", urlComponents: &url.URL{Scheme: "ftp", Host: "example.com"}, isURL: true, isRFC3986URL: true, isRFC3986URI: true}, {rawURL: "ftp.example.com", urlComponents: &url.URL{Scheme: "", Host: "", Path: "ftp.example.com"}, isURL: true, isRFC3986URL: false, isRFC3986URI: false}, {rawURL: "http://127.0.0.1/", urlComponents: &url.URL{Scheme: "http", Host: "127.0.0.1", Path: "/"}, isURL: true, isRFC3986URL: true, isRFC3986URI: true}, {rawURL: "http://example.com/?query=%2F", urlComponents: &url.URL{Scheme: "http", Host: "example.com", Path: "/", RawQuery: "query=%2F"}, isURL: true, isRFC3986URL: true, isRFC3986URI: true}, {rawURL: "http://localhost:3000/", urlComponents: &url.URL{Scheme: "http", Host: "localhost:3000", Path: "/"}, isURL: true, isRFC3986URL: true, isRFC3986URI: true}, {rawURL: "http://example.com/?query", urlComponents: &url.URL{Scheme: "http", Host: "example.com", Path: "/", RawQuery: "query"}, isURL: true, isRFC3986URL: true, isRFC3986URI: true}, {rawURL: "http://example.com?query", urlComponents: &url.URL{Scheme: "http", Host: "example.com", Path: "", RawQuery: "query"}, isURL: true, isRFC3986URL: true, isRFC3986URI: true}, {rawURL: "http://www.xn--froschgrn-x9a.net/", urlComponents: &url.URL{Scheme: "http", Host: "www.xn--froschgrn-x9a.net", Path: "/"}, isURL: true, isRFC3986URL: true, isRFC3986URI: true}, {rawURL: "http://example.com/a-", urlComponents: &url.URL{Scheme: "http", Host: "example.com", Path: "/a-"}, isURL: true, isRFC3986URL: true, isRFC3986URI: true}, {rawURL: "http://example.پاکستان/", urlComponents: &url.URL{Scheme: "http", Host: "example.پاکستان", Path: "/"}, isURL: true, isRFC3986URL: true, isRFC3986URI: true}, {rawURL: "http://example.c_o_m/", urlComponents: nil, isURL: false, isRFC3986URL: true, isRFC3986URI: true}, {rawURL: "http://_example.com/", urlComponents: nil, isURL: false, isRFC3986URL: true, isRFC3986URI: true}, {rawURL: "http://example_example.com/", urlComponents: &url.URL{Scheme: "http", Host: "example_example.com", Path: "/"}, isURL: true, isRFC3986URL: true, isRFC3986URI: true}, {rawURL: "xyz://example.com", urlComponents: nil, isURL: false, isRFC3986URL: true, isRFC3986URI: true}, {rawURL: ".com", urlComponents: nil, isURL: false, isRFC3986URL: false, isRFC3986URI: false}, {rawURL: "invalid.", urlComponents: &url.URL{Scheme: "", Host: "", Path: "invalid."}, isURL: true, isRFC3986URL: false, isRFC3986URI: false}, {rawURL: "http://example.com/~user", urlComponents: &url.URL{Scheme: "http", Host: "example.com", Path: "/~user"}, isURL: true, isRFC3986URL: true, isRFC3986URI: true}, {rawURL: "mailto:someone@example.com", urlComponents: &url.URL{Scheme: "mailto", Host: "", Opaque: "someone@example.com"}, isURL: true, isRFC3986URL: true, isRFC3986URI: true}, {rawURL: "/abs/test/dir", urlComponents: nil, isURL: false, isRFC3986URL: false, isRFC3986URI: true}, {rawURL: "./rel/test/dir", urlComponents: nil, isURL: false, isRFC3986URL: false, isRFC3986URI: false}, {rawURL: "http://example-.com/", urlComponents: nil, isURL: false, isRFC3986URL: true, isRFC3986URI: true}, {rawURL: "http://-example.com/", urlComponents: nil, isURL: false, isRFC3986URL: true, isRFC3986URI: true}, {rawURL: "http://example_.com/", urlComponents: nil, isURL: false, isRFC3986URL: true, isRFC3986URI: true}, {rawURL: "http://_example.com/", urlComponents: nil, isURL: false, isRFC3986URL: true, isRFC3986URI: true}, {rawURL: "http://example.com:80:80/", urlComponents: nil, isURL: false, isRFC3986URL: true, isRFC3986URI: true}, {rawURL: "http://example.com://8080", urlComponents: nil, isURL: false, isRFC3986URL: true, isRFC3986URI: true}, } func TestCheckVerify_HTTPCheckDisabledDefault(t *testing.T) { for _, test := range testURLs { urlToCheck := test.rawURL verifier := NewVerifier() //verifier.DisableHTTPCheck() ret, err := verifier.Verify(urlToCheck) expected := Result{ URL: urlToCheck, URLComponents: test.urlComponents, IsURL: test.isURL, IsRFC3986URL: test.isRFC3986URL, IsRFC3986URI: test.isRFC3986URI, HTTP: nil, } assert.Equal(t, expected, *ret) assert.Nil(t, err) } } func TestCheckVerify_HTTPCheckDisabledExplicit(t *testing.T) { for _, test := range testURLs { urlToCheck := test.rawURL verifier := NewVerifier() verifier.DisableHTTPCheck() ret, err := verifier.Verify(urlToCheck) expected := Result{ URL: urlToCheck, URLComponents: test.urlComponents, IsURL: test.isURL, IsRFC3986URL: test.isRFC3986URL, IsRFC3986URI: test.isRFC3986URI, HTTP: nil, } assert.Equal(t, expected, *ret) assert.Nil(t, err) } } func TestCheckVerify_HTTPCheckEnabledValid(t *testing.T) { urlToCheck := "https://example.com/" verifier := NewVerifier() verifier.EnableHTTPCheck() ret, err := verifier.Verify(urlToCheck) expected := Result{ URL: urlToCheck, URLComponents: &url.URL{Scheme: "https", Host: "example.com", Path: "/"}, IsURL: true, IsRFC3986URL: true, IsRFC3986URI: true, HTTP: &HTTP{ Reachable: true, StatusCode: 200, IsSuccess: true, }, } assert.Equal(t, expected, *ret) assert.Nil(t, err) } func TestCheckVerify_HTTPCheckEnabledInvalid(t *testing.T) { urlToCheck := "invalid" verifier := NewVerifier() verifier.EnableHTTPCheck() ret, err := verifier.Verify(urlToCheck) expected := Result{ URL: urlToCheck, URLComponents: nil, IsURL: false, IsRFC3986URL: false, IsRFC3986URI: false, HTTP: nil, } assert.Equal(t, expected, *ret) assert.Error(t, err) assert.ErrorContains(t, err, "unable to check if the URL is reachable via HTTP: the URL does not have a HTTP or HTTPS scheme") } func TestCheckVerify_HTTPCheckEnabledValidUnreachable(t *testing.T) { urlToCheck := "https://example.unreachable/" verifier := NewVerifier() verifier.EnableHTTPCheck() ret, err := verifier.Verify(urlToCheck) expected := Result{ URL: urlToCheck, URLComponents: &url.URL{Scheme: "https", Host: "example.unreachable", Path: "/"}, IsURL: true, IsRFC3986URL: true, IsRFC3986URI: true, HTTP: nil, } assert.Equal(t, expected, *ret) assert.IsType(t, &net.DNSError{}, err) assert.ErrorContains(t, err, "lookup example.unreachable: no such host") } func TestCheckVerify_HTTPCheckEnabledValidLocalDisallowedDefault(t *testing.T) { urlToCheck := "https://localhost/" verifier := NewVerifier() verifier.EnableHTTPCheck() //verifier.DisallowHTTPCheckInternal() ret, err := verifier.Verify(urlToCheck) expected := Result{ URL: urlToCheck, URLComponents: &url.URL{Scheme: "https", Host: "localhost", Path: "/"}, IsURL: true, IsRFC3986URL: true, IsRFC3986URI: true, HTTP: nil, } assert.Equal(t, expected, *ret) assert.Error(t, err) assert.ErrorContains(t, err, "unable to check if the URL is reachable via HTTP: the URL localhost resolves to an internal IP") } func TestCheckVerify_HTTPCheckEnabledValidLocalDisallowedExplicit(t *testing.T) { urlToCheck := "https://localhost/" verifier := NewVerifier() verifier.EnableHTTPCheck() verifier.DisallowHTTPCheckInternal() ret, err := verifier.Verify(urlToCheck) expected := Result{ URL: urlToCheck, URLComponents: &url.URL{Scheme: "https", Host: "localhost", Path: "/"}, IsURL: true, IsRFC3986URL: true, IsRFC3986URI: true, HTTP: nil, } assert.Equal(t, expected, *ret) assert.Error(t, err) assert.ErrorContains(t, err, "unable to check if the URL is reachable via HTTP: the URL localhost resolves to an internal IP") } func TestCheckVerify_HTTPCheckEnabledValidLocalAllowed(t *testing.T) { // Start a local test server ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { fmt.Fprintln(w, "Hello, client") })) defer ts.Close() // Parse the test server URL tsURL, err := url.Parse(ts.URL) if err != nil { t.Fatal(err) } urlToCheck := ts.URL verifier := NewVerifier() verifier.EnableHTTPCheck() verifier.AllowHTTPCheckInternal() ret, err := verifier.Verify(urlToCheck) expected := Result{ URL: urlToCheck, URLComponents: &url.URL{Scheme: "http", Host: tsURL.Host, Path: ""}, IsURL: true, IsRFC3986URL: true, IsRFC3986URI: true, HTTP: &HTTP{ Reachable: true, StatusCode: 200, IsSuccess: true, }, } assert.Equal(t, expected, *ret) assert.Nil(t, err) } func TestCheckVerify_HTTPCheckEnabledInvalidScheme(t *testing.T) { urlToCheck := "example.com" verifier := NewVerifier() verifier.EnableHTTPCheck() ret, err := verifier.Verify(urlToCheck) expected := Result{ URL: urlToCheck, URLComponents: &url.URL{Scheme: "", Host: "", Path: "example.com"}, IsURL: true, IsRFC3986URL: false, IsRFC3986URI: false, HTTP: nil, } assert.Equal(t, expected, *ret) assert.Error(t, err) assert.ErrorContains(t, err, "unable to check if the URL is reachable via HTTP: the URL does not have a HTTP or HTTPS scheme") } func TestIsRequestURL(t *testing.T) { for _, test := range testURLs { urlToCheck := test.rawURL verifier := NewVerifier() ret := verifier.IsRequestURL(urlToCheck) assert.Equal(t, test.isRFC3986URL, ret) } } func TestIsRequestURI(t *testing.T) { for _, test := range testURLs { urlToCheck := test.rawURL verifier := NewVerifier() ret := verifier.IsRequestURI(urlToCheck) assert.Equal(t, test.isRFC3986URI, ret) } }