pax_global_header00006660000000000000000000000064136546006660014526gustar00rootroot0000000000000052 comment=a262ddef4035fbcef4ba6dcc6fae6dd28cad4243 go-oniguruma-1.2.1/000077500000000000000000000000001365460066600141405ustar00rootroot00000000000000go-oniguruma-1.2.1/.github/000077500000000000000000000000001365460066600155005ustar00rootroot00000000000000go-oniguruma-1.2.1/.github/workflows/000077500000000000000000000000001365460066600175355ustar00rootroot00000000000000go-oniguruma-1.2.1/.github/workflows/test.yml000066400000000000000000000014711365460066600212420ustar00rootroot00000000000000on: [push, pull_request] name: Test jobs: test: strategy: matrix: go-version: [1.13.x, 1.14.x] runs-on: ubuntu-latest env: ONIGURUMA_VERSION: 6.9.4 steps: - name: Install libonig5 run: | wget "http://archive.ubuntu.com/ubuntu/pool/universe/libo/libonig/libonig5_${ONIGURUMA_VERSION}-1_amd64.deb" sudo dpkg -i "libonig5_${ONIGURUMA_VERSION}-1_amd64.deb" wget "http://archive.ubuntu.com/ubuntu/pool/universe/libo/libonig/libonig-dev_${ONIGURUMA_VERSION}-1_amd64.deb" sudo dpkg -i "libonig-dev_${ONIGURUMA_VERSION}-1_amd64.deb" - name: Install Go uses: actions/setup-go@v1 with: go-version: ${{ matrix.go-version }} - name: Checkout code uses: actions/checkout@v2 - name: Test run: go test -v ./... go-oniguruma-1.2.1/LICENSE000066400000000000000000000020431365460066600151440ustar00rootroot00000000000000Copyright (C) 2011 by Zhigang Chen Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. go-oniguruma-1.2.1/README.md000066400000000000000000000020651365460066600154220ustar00rootroot00000000000000# go-oniguruma ![Test](https://github.com/go-enry/go-oniguruma/workflows/Test/badge.svg) This repository is a fork of [moovweb/rubex](https://github.com/moovweb/rubex/tree/go1) - a simple regular expression library (based on [oniguruma](https://github.com/kkos/oniguruma)) that supports Ruby's regex syntax. The _rubex_ was originally created by Zhigang Chen (zhigang.chen@moovweb.com or zhigangc@gmail.com). It implements all the public functions of Go's Regexp package, except LiteralPrefix. By the benchmark tests in regexp, the library is 40% to 10X faster than Regexp on all but one test. Unlike Go's regexp, this library supports named capture groups and also allow `"\\1"` and `"\\k"` in replacement strings. The library calls the _oniguruma_ regex library for regex pattern searching. All replacement code is done in Go. Install ------- ```sh # linux (debian/ubuntu/...) sudo apt-get install libonig-dev # osx (homebrew) brew install oniguruma go get github.com/go-enry/go-oniguruma ``` License ------- Apache License Version 2.0, see [LICENSE](LICENSE) go-oniguruma-1.2.1/chelper.c000066400000000000000000000134651365460066600157370ustar00rootroot00000000000000#include #include #include #ifdef BENCHMARK_CHELP #include #endif #include "chelper.h" int NewOnigRegex( char *pattern, int pattern_length, int option, OnigRegex *regex, OnigEncoding *encoding, OnigErrorInfo **error_info, char **error_buffer) { int ret = ONIG_NORMAL; int error_msg_len = 0; OnigUChar *pattern_start = (OnigUChar *) pattern; OnigUChar *pattern_end = (OnigUChar *) (pattern + pattern_length); *error_info = (OnigErrorInfo *) malloc(sizeof(OnigErrorInfo)); memset(*error_info, 0, sizeof(OnigErrorInfo)); onig_initialize_encoding(*encoding); *error_buffer = (char*) malloc(ONIG_MAX_ERROR_MESSAGE_LEN * sizeof(char)); memset(*error_buffer, 0, ONIG_MAX_ERROR_MESSAGE_LEN * sizeof(char)); ret = onig_new(regex, pattern_start, pattern_end, (OnigOptionType)(option), *encoding, OnigDefaultSyntax, *error_info); if (ret != ONIG_NORMAL) { error_msg_len = onig_error_code_to_str((unsigned char*)(*error_buffer), ret, *error_info); if (error_msg_len >= ONIG_MAX_ERROR_MESSAGE_LEN) { error_msg_len = ONIG_MAX_ERROR_MESSAGE_LEN - 1; } (*error_buffer)[error_msg_len] = '\0'; } return ret; } int SearchOnigRegex( void *str, int str_length, int offset, int option, OnigRegex regex, OnigErrorInfo *error_info, char *error_buffer, int *captures, int *numCaptures) { int ret = ONIG_MISMATCH; int error_msg_len = 0; OnigRegion *region; #ifdef BENCHMARK_CHELP struct timeval tim1, tim2; long t; #endif OnigUChar *str_start = (OnigUChar *) str; OnigUChar *str_end = (OnigUChar *) (str_start + str_length); OnigUChar *search_start = (OnigUChar *)(str_start + offset); OnigUChar *search_end = str_end; #ifdef BENCHMARK_CHELP gettimeofday(&tim1, NULL); #endif region = onig_region_new(); ret = onig_search(regex, str_start, str_end, search_start, search_end, region, option); if (ret < 0 && error_buffer != NULL) { error_msg_len = onig_error_code_to_str((unsigned char*)(error_buffer), ret, error_info); if (error_msg_len >= ONIG_MAX_ERROR_MESSAGE_LEN) { error_msg_len = ONIG_MAX_ERROR_MESSAGE_LEN - 1; } error_buffer[error_msg_len] = '\0'; } else if (captures != NULL) { int i; int count = 0; for (i = 0; i < region->num_regs; i++) { captures[2*count] = region->beg[i]; captures[2*count+1] = region->end[i]; count ++; } *numCaptures = count; } onig_region_free(region, 1); #ifdef BENCHMARK_CHELP gettimeofday(&tim2, NULL); t = (tim2.tv_sec - tim1.tv_sec) * 1000000 + tim2.tv_usec - tim1.tv_usec; printf("%ld microseconds elapsed\n", t); #endif return ret; } int MatchOnigRegex(void *str, int str_length, int offset, int option, OnigRegex regex) { int ret = ONIG_MISMATCH; int error_msg_len = 0; OnigRegion *region; #ifdef BENCHMARK_CHELP struct timeval tim1, tim2; long t; #endif OnigUChar *str_start = (OnigUChar *) str; OnigUChar *str_end = (OnigUChar *) (str_start + str_length); OnigUChar *search_start = (OnigUChar *)(str_start + offset); #ifdef BENCHMARK_CHELP gettimeofday(&tim1, NULL); #endif region = onig_region_new(); ret = onig_match(regex, str_start, str_end, search_start, region, option); onig_region_free(region, 1); #ifdef BENCHMARK_CHELP gettimeofday(&tim2, NULL); t = (tim2.tv_sec - tim1.tv_sec) * 1000000 + tim2.tv_usec - tim1.tv_usec; printf("%ld microseconds elapsed\n", t); #endif return ret; } int LookupOnigCaptureByName(char *name, int name_length, OnigRegex regex) { int ret = ONIGERR_UNDEFINED_NAME_REFERENCE; OnigRegion *region; #ifdef BENCHMARK_CHELP struct timeval tim1, tim2; long t; #endif OnigUChar *name_start = (OnigUChar *) name; OnigUChar *name_end = (OnigUChar *) (name_start + name_length); #ifdef BENCHMARK_CHELP gettimeofday(&tim1, NULL); #endif region = onig_region_new(); ret = onig_name_to_backref_number(regex, name_start, name_end, region); onig_region_free(region, 1); #ifdef BENCHMARK_CHELP gettimeofday(&tim2, NULL); t = (tim2.tv_sec - tim1.tv_sec) * 1000000 + tim2.tv_usec - tim1.tv_usec; printf("%ld microseconds elapsed\n", t); #endif return ret; } typedef struct { char *nameBuffer; int bufferOffset; int bufferSize; int *numbers; int numIndex; } group_info_t; int name_callback(const UChar* name, const UChar* name_end, int ngroup_num, int* group_nums, regex_t* reg, void* arg) { int nameLen, offset, newOffset; group_info_t *groupInfo; groupInfo = (group_info_t*) arg; offset = groupInfo->bufferOffset; nameLen = name_end - name; newOffset = offset + nameLen; //if there are already names, add a ";" if (offset > 0) { newOffset += 1; } if (newOffset <= groupInfo->bufferSize) { if (offset > 0) { groupInfo->nameBuffer[offset] = ';'; offset += 1; } memcpy(&groupInfo->nameBuffer[offset], name, nameLen); } groupInfo->bufferOffset = newOffset; if (ngroup_num > 0) { groupInfo->numbers[groupInfo->numIndex] = group_nums[ngroup_num-1]; } else { groupInfo->numbers[groupInfo->numIndex] = -1; } groupInfo->numIndex += 1; return 0; /* 0: continue */ } int GetCaptureNames(OnigRegex reg, void *buffer, int bufferSize, int* groupNumbers) { int ret; group_info_t groupInfo; groupInfo.nameBuffer = (char*)buffer; groupInfo.bufferOffset = 0; groupInfo.bufferSize = bufferSize; groupInfo.numbers = groupNumbers; groupInfo.numIndex = 0; onig_foreach_name(reg, name_callback, (void* )&groupInfo); return groupInfo.bufferOffset; } go-oniguruma-1.2.1/chelper.h000066400000000000000000000013271365460066600157360ustar00rootroot00000000000000#include extern int NewOnigRegex( char *pattern, int pattern_length, int option, OnigRegex *regex, OnigEncoding *encoding, OnigErrorInfo **error_info, char **error_buffer); extern int SearchOnigRegex( void *str, int str_length, int offset, int option, OnigRegex regex, OnigErrorInfo *error_info, char *error_buffer, int *captures, int *numCaptures); extern int MatchOnigRegex( void *str, int str_length, int offset, int option, OnigRegex regex); extern int LookupOnigCaptureByName(char *name, int name_length, OnigRegex regex); extern int GetCaptureNames(OnigRegex regex, void *buffer, int bufferSize, int* groupNumbers); go-oniguruma-1.2.1/constants.go000066400000000000000000000020601365460066600165010ustar00rootroot00000000000000package rubex const ( ONIG_OPTION_DEFAULT = ONIG_OPTION_NONE /* options */ ONIG_OPTION_NONE = 0 ONIG_OPTION_IGNORECASE = 1 ONIG_OPTION_EXTEND = (ONIG_OPTION_IGNORECASE << 1) ONIG_OPTION_MULTILINE = (ONIG_OPTION_EXTEND << 1) ONIG_OPTION_SINGLELINE = (ONIG_OPTION_MULTILINE << 1) ONIG_OPTION_FIND_LONGEST = (ONIG_OPTION_SINGLELINE << 1) ONIG_OPTION_FIND_NOT_EMPTY = (ONIG_OPTION_FIND_LONGEST << 1) ONIG_OPTION_NEGATE_SINGLELINE = (ONIG_OPTION_FIND_NOT_EMPTY << 1) ONIG_OPTION_DONT_CAPTURE_GROUP = (ONIG_OPTION_NEGATE_SINGLELINE << 1) ONIG_OPTION_CAPTURE_GROUP = (ONIG_OPTION_DONT_CAPTURE_GROUP << 1) /* options (search time) */ ONIG_OPTION_NOTBOL = (ONIG_OPTION_CAPTURE_GROUP << 1) ONIG_OPTION_NOTEOL = (ONIG_OPTION_NOTBOL << 1) ONIG_OPTION_POSIX_REGION = (ONIG_OPTION_NOTEOL << 1) ONIG_OPTION_MAXBIT = ONIG_OPTION_POSIX_REGION /* limit */ ONIG_NORMAL = 0 ONIG_MISMATCH = -1 ONIG_MISMATCH_STR = "mismatch" ONIGERR_UNDEFINED_NAME_REFERENCE = -217 ) go-oniguruma-1.2.1/go.mod000066400000000000000000000000601365460066600152420ustar00rootroot00000000000000module github.com/go-enry/go-oniguruma go 1.14 go-oniguruma-1.2.1/quotemeta.go000066400000000000000000000015711365460066600164770ustar00rootroot00000000000000// Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Package regexp implements a simple regular expression library. // QuoteMeta func is copied here to avoid linking the entire Regexp library. package rubex func special(c int) bool { for _, r := range `\.+*?()|[]^$` { if c == int(r) { return true } } return false } // QuoteMeta returns a string that quotes all regular expression metacharacters // inside the argument text; the returned string is a regular expression matching // the literal text. For example, QuoteMeta(`[foo]`) returns `\[foo\]`. func QuoteMeta(s string) string { b := make([]byte, 2*len(s)) // A byte loop is correct because all metacharacters are ASCII. j := 0 for i := 0; i < len(s); i++ { if special(int(s[i])) { b[j] = '\\' j++ } b[j] = s[i] j++ } return string(b[0:j]) } go-oniguruma-1.2.1/regex.go000066400000000000000000000356631365460066600156160ustar00rootroot00000000000000package rubex /* #cgo CFLAGS: -I/usr/local/include #cgo LDFLAGS: -L/usr/local/lib -lonig #include #include #include "chelper.h" */ import "C" import ( "bytes" "errors" "fmt" "io" "runtime" "strconv" "sync" "unicode/utf8" "unsafe" ) const numMatchStartSize = 4 const numReadBufferStartSize = 256 var mutex sync.Mutex type NamedGroupInfo map[string]int type Regexp struct { pattern string regex C.OnigRegex encoding C.OnigEncoding errorInfo *C.OnigErrorInfo errorBuf *C.char numCaptures int32 namedGroupInfo NamedGroupInfo } // NewRegexp creates and initializes a new Regexp with the given pattern and option. func NewRegexp(pattern string, option int) (*Regexp, error) { return initRegexp(&Regexp{pattern: pattern, encoding: C.ONIG_ENCODING_UTF8}, option) } // NewRegexpASCII is equivalent to NewRegexp, but with the encoding restricted to ASCII. func NewRegexpASCII(pattern string, option int) (*Regexp, error) { return initRegexp(&Regexp{pattern: pattern, encoding: C.ONIG_ENCODING_ASCII}, option) } func initRegexp(re *Regexp, option int) (*Regexp, error) { patternCharPtr := C.CString(re.pattern) defer C.free(unsafe.Pointer(patternCharPtr)) mutex.Lock() defer mutex.Unlock() errorCode := C.NewOnigRegex(patternCharPtr, C.int(len(re.pattern)), C.int(option), &re.regex, &re.encoding, &re.errorInfo, &re.errorBuf) if errorCode != C.ONIG_NORMAL { return re, errors.New(C.GoString(re.errorBuf)) } re.numCaptures = int32(C.onig_number_of_captures(re.regex)) + 1 re.namedGroupInfo = re.getNamedGroupInfo() runtime.SetFinalizer(re, (*Regexp).Free) return re, nil } func Compile(str string) (*Regexp, error) { return NewRegexp(str, ONIG_OPTION_DEFAULT) } func MustCompile(str string) *Regexp { regexp, error := NewRegexp(str, ONIG_OPTION_DEFAULT) if error != nil { panic("regexp: compiling " + str + ": " + error.Error()) } return regexp } func CompileWithOption(str string, option int) (*Regexp, error) { return NewRegexp(str, option) } func MustCompileWithOption(str string, option int) *Regexp { regexp, error := NewRegexp(str, option) if error != nil { panic("regexp: compiling " + str + ": " + error.Error()) } return regexp } // MustCompileASCII is equivalent to MustCompile, but with the encoding restricted to ASCII. func MustCompileASCII(str string) *Regexp { regexp, error := NewRegexpASCII(str, ONIG_OPTION_DEFAULT) if error != nil { panic("regexp: compiling " + str + ": " + error.Error()) } return regexp } func (re *Regexp) Free() { mutex.Lock() if re.regex != nil { C.onig_free(re.regex) re.regex = nil } mutex.Unlock() if re.errorInfo != nil { C.free(unsafe.Pointer(re.errorInfo)) re.errorInfo = nil } if re.errorBuf != nil { C.free(unsafe.Pointer(re.errorBuf)) re.errorBuf = nil } } func (re *Regexp) getNamedGroupInfo() NamedGroupInfo { numNamedGroups := int(C.onig_number_of_names(re.regex)) // when any named capture exists, there is no numbered capture even if // there are unnamed captures. if numNamedGroups == 0 { return nil } namedGroupInfo := make(map[string]int) //try to get the names bufferSize := len(re.pattern) * 2 nameBuffer := make([]byte, bufferSize) groupNumbers := make([]int32, numNamedGroups) bufferPtr := unsafe.Pointer(&nameBuffer[0]) numbersPtr := unsafe.Pointer(&groupNumbers[0]) length := int(C.GetCaptureNames(re.regex, bufferPtr, (C.int)(bufferSize), (*C.int)(numbersPtr))) if length == 0 { panic(fmt.Errorf("could not get the capture group names from %q", re.String())) } namesAsBytes := bytes.Split(nameBuffer[:length], ([]byte)(";")) if len(namesAsBytes) != numNamedGroups { panic(fmt.Errorf( "the number of named groups (%d) does not match the number names found (%d)", numNamedGroups, len(namesAsBytes), )) } for i, nameAsBytes := range namesAsBytes { name := string(nameAsBytes) namedGroupInfo[name] = int(groupNumbers[i]) } return namedGroupInfo } func (re *Regexp) find(b []byte, n int, offset int) []int { match := make([]int, re.numCaptures*2) if n == 0 { b = []byte{0} } bytesPtr := unsafe.Pointer(&b[0]) // captures contains two pairs of ints, start and end, so we need list // twice the size of the capture groups. captures := make([]C.int, re.numCaptures*2) capturesPtr := unsafe.Pointer(&captures[0]) var numCaptures int32 numCapturesPtr := unsafe.Pointer(&numCaptures) pos := int(C.SearchOnigRegex( bytesPtr, C.int(n), C.int(offset), C.int(ONIG_OPTION_DEFAULT), re.regex, re.errorInfo, (*C.char)(nil), (*C.int)(capturesPtr), (*C.int)(numCapturesPtr), )) if pos < 0 { return nil } if numCaptures <= 0 { panic("cannot have 0 captures when processing a match") } if re.numCaptures != numCaptures { panic(fmt.Errorf("expected %d captures but got %d", re.numCaptures, numCaptures)) } for i := range captures { match[i] = int(captures[i]) } return match } func getCapture(b []byte, beg int, end int) []byte { if beg < 0 || end < 0 { return nil } return b[beg:end] } func (re *Regexp) match(b []byte, n int, offset int) bool { if n == 0 { b = []byte{0} } bytesPtr := unsafe.Pointer(&b[0]) pos := int(C.SearchOnigRegex( bytesPtr, C.int(n), C.int(offset), C.int(ONIG_OPTION_DEFAULT), re.regex, re.errorInfo, nil, nil, nil, )) return pos >= 0 } func (re *Regexp) findAll(b []byte, n int) [][]int { if n < 0 { n = len(b) } capture := make([][]int, 0, numMatchStartSize) var offset int for offset <= n { match := re.find(b, n, offset) if match == nil { break } capture = append(capture, match) // move offset to the ending index of the current match and prepare to // find the next non-overlapping match. offset = match[1] // if match[0] == match[1], it means the current match does not advance // the search. we need to exit the loop to avoid getting stuck here. if match[0] == match[1] { if offset < n && offset >= 0 { //there are more bytes, so move offset by a word _, width := utf8.DecodeRune(b[offset:]) offset += width } else { //search is over, exit loop break } } } return capture } func (re *Regexp) FindIndex(b []byte) []int { match := re.find(b, len(b), 0) if len(match) == 0 { return nil } return match[:2] } func (re *Regexp) Find(b []byte) []byte { loc := re.FindIndex(b) if loc == nil { return nil } return getCapture(b, loc[0], loc[1]) } func (re *Regexp) FindString(s string) string { mb := re.Find([]byte(s)) if mb == nil { return "" } return string(mb) } func (re *Regexp) FindStringIndex(s string) []int { return re.FindIndex([]byte(s)) } func (re *Regexp) FindAllIndex(b []byte, n int) [][]int { matches := re.findAll(b, n) if len(matches) == 0 { return nil } return matches } func (re *Regexp) FindAll(b []byte, n int) [][]byte { matches := re.FindAllIndex(b, n) if matches == nil { return nil } matchBytes := make([][]byte, 0, len(matches)) for _, match := range matches { matchBytes = append(matchBytes, getCapture(b, match[0], match[1])) } return matchBytes } func (re *Regexp) FindAllString(s string, n int) []string { b := []byte(s) matches := re.FindAllIndex(b, n) if matches == nil { return nil } matchStrings := make([]string, 0, len(matches)) for _, match := range matches { m := getCapture(b, match[0], match[1]) if m == nil { matchStrings = append(matchStrings, "") } else { matchStrings = append(matchStrings, string(m)) } } return matchStrings } func (re *Regexp) FindAllStringIndex(s string, n int) [][]int { return re.FindAllIndex([]byte(s), n) } func (re *Regexp) FindSubmatchIndex(b []byte) []int { match := re.find(b, len(b), 0) if len(match) == 0 { return nil } return match } func (re *Regexp) FindSubmatch(b []byte) [][]byte { match := re.FindSubmatchIndex(b) if match == nil { return nil } length := len(match) / 2 if length == 0 { return nil } results := make([][]byte, 0, length) for i := 0; i < length; i++ { results = append(results, getCapture(b, match[2*i], match[2*i+1])) } return results } func (re *Regexp) FindStringSubmatch(s string) []string { b := []byte(s) match := re.FindSubmatchIndex(b) if match == nil { return nil } length := len(match) / 2 if length == 0 { return nil } results := make([]string, 0, length) for i := 0; i < length; i++ { cap := getCapture(b, match[2*i], match[2*i+1]) if cap == nil { results = append(results, "") } else { results = append(results, string(cap)) } } return results } func (re *Regexp) FindStringSubmatchIndex(s string) []int { return re.FindSubmatchIndex([]byte(s)) } func (re *Regexp) FindAllSubmatchIndex(b []byte, n int) [][]int { matches := re.findAll(b, n) if len(matches) == 0 { return nil } return matches } func (re *Regexp) FindAllSubmatch(b []byte, n int) [][][]byte { matches := re.findAll(b, n) if len(matches) == 0 { return nil } allCapturedBytes := make([][][]byte, 0, len(matches)) for _, match := range matches { length := len(match) / 2 capturedBytes := make([][]byte, 0, length) for i := 0; i < length; i++ { capturedBytes = append(capturedBytes, getCapture(b, match[2*i], match[2*i+1])) } allCapturedBytes = append(allCapturedBytes, capturedBytes) } return allCapturedBytes } func (re *Regexp) FindAllStringSubmatch(s string, n int) [][]string { b := []byte(s) matches := re.findAll(b, n) if len(matches) == 0 { return nil } allCapturedStrings := make([][]string, 0, len(matches)) for _, match := range matches { length := len(match) / 2 capturedStrings := make([]string, 0, length) for i := 0; i < length; i++ { cap := getCapture(b, match[2*i], match[2*i+1]) if cap == nil { capturedStrings = append(capturedStrings, "") } else { capturedStrings = append(capturedStrings, string(cap)) } } allCapturedStrings = append(allCapturedStrings, capturedStrings) } return allCapturedStrings } func (re *Regexp) FindAllStringSubmatchIndex(s string, n int) [][]int { return re.FindAllSubmatchIndex([]byte(s), n) } func (re *Regexp) Match(b []byte) bool { return re.match(b, len(b), 0) } func (re *Regexp) MatchString(s string) bool { return re.Match([]byte(s)) } func (re *Regexp) NumSubexp() int { return (int)(C.onig_number_of_captures(re.regex)) } func fillCapturedValues(repl []byte, _ []byte, capturedBytes map[string][]byte) []byte { replLen := len(repl) newRepl := make([]byte, 0, replLen*3) groupName := make([]byte, 0, replLen) var inGroupNameMode, inEscapeMode bool for index := 0; index < replLen; index++ { ch := repl[index] if inGroupNameMode && ch == byte('<') { } else if inGroupNameMode && ch == byte('>') { inGroupNameMode = false capBytes := capturedBytes[string(groupName)] newRepl = append(newRepl, capBytes...) groupName = groupName[:0] //reset the name } else if inGroupNameMode { groupName = append(groupName, ch) } else if inEscapeMode && ch <= byte('9') && byte('1') <= ch { capNumStr := string(ch) capBytes := capturedBytes[capNumStr] newRepl = append(newRepl, capBytes...) } else if inEscapeMode && ch == byte('k') && (index+1) < replLen && repl[index+1] == byte('<') { inGroupNameMode = true inEscapeMode = false index++ //bypass the next char '<' } else if inEscapeMode { newRepl = append(newRepl, '\\') newRepl = append(newRepl, ch) } else if ch != '\\' { newRepl = append(newRepl, ch) } if ch == byte('\\') || inEscapeMode { inEscapeMode = !inEscapeMode } } return newRepl } func (re *Regexp) replaceAll(src, repl []byte, replFunc func([]byte, []byte, map[string][]byte) []byte) []byte { srcLen := len(src) matches := re.findAll(src, srcLen) if len(matches) == 0 { return src } dest := make([]byte, 0, srcLen) for i, match := range matches { length := len(match) / 2 capturedBytes := make(map[string][]byte) if re.namedGroupInfo == nil { for j := 0; j < length; j++ { capturedBytes[strconv.Itoa(j)] = getCapture(src, match[2*j], match[2*j+1]) } } else { for name, j := range re.namedGroupInfo { capturedBytes[name] = getCapture(src, match[2*j], match[2*j+1]) } } matchBytes := getCapture(src, match[0], match[1]) newRepl := replFunc(repl, matchBytes, capturedBytes) prevEnd := 0 if i > 0 { prevMatch := matches[i-1][:2] prevEnd = prevMatch[1] } if match[0] > prevEnd && prevEnd >= 0 && match[0] <= srcLen { dest = append(dest, src[prevEnd:match[0]]...) } dest = append(dest, newRepl...) } lastEnd := matches[len(matches)-1][1] if lastEnd < srcLen && lastEnd >= 0 { dest = append(dest, src[lastEnd:]...) } return dest } func (re *Regexp) ReplaceAll(src, repl []byte) []byte { return re.replaceAll(src, repl, fillCapturedValues) } func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte { return re.replaceAll(src, nil, func(_ []byte, matchBytes []byte, _ map[string][]byte) []byte { return repl(matchBytes) }) } func (re *Regexp) ReplaceAllString(src, repl string) string { return string(re.ReplaceAll([]byte(src), []byte(repl))) } func (re *Regexp) ReplaceAllStringFunc(src string, repl func(string) string) string { return string(re.replaceAll([]byte(src), nil, func(_ []byte, matchBytes []byte, _ map[string][]byte) []byte { return []byte(repl(string(matchBytes))) })) } func (re *Regexp) String() string { return re.pattern } func growBuffer(b []byte, offset int, n int) []byte { if offset+n > cap(b) { buf := make([]byte, 2*cap(b)+n) copy(buf, b[:offset]) return buf } return b } func fromReader(r io.RuneReader) []byte { b := make([]byte, numReadBufferStartSize) var offset int for { rune, runeWidth, err := r.ReadRune() if err != nil { break } b = growBuffer(b, offset, runeWidth) writeWidth := utf8.EncodeRune(b[offset:], rune) if runeWidth != writeWidth { panic("reading rune width not equal to the written rune width") } offset += writeWidth } return b[:offset] } func (re *Regexp) FindReaderIndex(r io.RuneReader) []int { b := fromReader(r) return re.FindIndex(b) } func (re *Regexp) FindReaderSubmatchIndex(r io.RuneReader) []int { b := fromReader(r) return re.FindSubmatchIndex(b) } func (re *Regexp) MatchReader(r io.RuneReader) bool { b := fromReader(r) return re.Match(b) } func (re *Regexp) LiteralPrefix() (prefix string, complete bool) { //no easy way to implement this return "", false } func MatchString(pattern string, s string) (matched bool, error error) { re, err := Compile(pattern) if err != nil { return false, err } return re.MatchString(s), nil } func (re *Regexp) Gsub(src, repl string) string { return string(re.replaceAll([]byte(src), []byte(repl), fillCapturedValues)) } func (re *Regexp) GsubFunc(src string, replFunc func(string, map[string]string) string) string { replaced := re.replaceAll([]byte(src), nil, func(_ []byte, matchBytes []byte, capturedBytes map[string][]byte) []byte { capturedStrings := make(map[string]string) for name, capBytes := range capturedBytes { capturedStrings[name] = string(capBytes) } matchString := string(matchBytes) return ([]byte)(replFunc(matchString, capturedStrings)) }, ) return string(replaced) } go-oniguruma-1.2.1/regex_test.go000066400000000000000000000746701365460066600166560ustar00rootroot00000000000000// Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package rubex import ( "errors" "fmt" "runtime" "strings" "testing" ) var good_re = []string{ ``, `.`, `^.$`, `a`, `a*`, `a+`, `a?`, `a|b`, `a*|b*`, `(a*|b)(c*|d)`, `[a-z]`, `[a-abc-c\-\]\[]`, `[a-z]+`, //`[]`, //this is not considered as good by ruby/javascript regex `[abc]`, `[^1234]`, `[^\n]`, `\!\\`, } type stringError struct { re string err error } var bad_re = []stringError{ {`*`, errors.New("target of repeat operator is not specified")}, {`+`, errors.New("target of repeat operator is not specified")}, {`?`, errors.New("target of repeat operator is not specified")}, {`(abc`, errors.New("end pattern with unmatched parenthesis")}, {`abc)`, errors.New("unmatched close parenthesis")}, {`x[a-z`, errors.New("premature end of char-class")}, //{`abc]`, Err}, //this is not considered as bad by ruby/javascript regex; nor are the following commented out regex patterns {`abc[`, errors.New("premature end of char-class")}, {`[z-a]`, errors.New("empty range in char class")}, {`abc\`, errors.New("end pattern at escape")}, //{`a**`, Err}, //{`a*+`, Err}, //{`a??`, Err}, //{`\x`, Err}, } func runParallel(testFunc func(chan bool), concurrency int) { runtime.GOMAXPROCS(4) done := make(chan bool, concurrency) for i := 0; i < concurrency; i++ { go testFunc(done) } for i := 0; i < concurrency; i++ { <-done <-done } runtime.GOMAXPROCS(1) } const numConcurrentRuns = 200 func compileTest(t *testing.T, expr string, error error) *Regexp { re, err := Compile(expr) if (error == nil && err != error) || (error != nil && err.Error() != error.Error()) { t.Error("compiling `", expr, "`; unexpected error: ", err.Error()) } return re } func TestGoodCompile(t *testing.T) { testFunc := func(done chan bool) { done <- false for i := 0; i < len(good_re); i++ { compileTest(t, good_re[i], nil) } done <- true } runParallel(testFunc, numConcurrentRuns) } func TestBadCompile(t *testing.T) { for i := 0; i < len(bad_re); i++ { compileTest(t, bad_re[i].re, bad_re[i].err) } } func matchTest(t *testing.T, test *FindTest) { re := compileTest(t, test.pat, nil) if re == nil { return } m := re.MatchString(test.text) if m != (len(test.matches) > 0) { t.Errorf("MatchString failure on %s: %t should be %t", test.pat, m, len(test.matches) > 0) } // now try bytes m = re.Match([]byte(test.text)) if m != (len(test.matches) > 0) { t.Errorf("Match failure on %s: %t should be %t", test.pat, m, len(test.matches) > 0) } } func TestMatch(t *testing.T) { for _, test := range findTests { matchTest(t, &test) } } func matchFunctionTest(t *testing.T, test *FindTest) { m, err := MatchString(test.pat, test.text) if err == nil { return } if m != (len(test.matches) > 0) { t.Errorf("Match failure on %s: %t should be %t", test, m, len(test.matches) > 0) } } func TestMatchFunction(t *testing.T) { for _, test := range findTests { matchFunctionTest(t, &test) } } type ReplaceTest struct { pattern, replacement, input, output string } var replaceTests = []ReplaceTest{ // Test empty input and/or replacement, with pattern that matches the empty string. {"", "", "", ""}, {"", "x", "", "x"}, {"", "", "abc", "abc"}, {"", "x", "abc", "xaxbxcx"}, // Test empty input and/or replacement, with pattern that does not match the empty string. {"b", "", "", ""}, {"b", "x", "", ""}, {"b", "", "abc", "ac"}, {"b", "x", "abc", "axc"}, {"y", "", "", ""}, {"y", "x", "", ""}, {"y", "", "abc", "abc"}, {"y", "x", "abc", "abc"}, // Multibyte characters -- verify that we don't try to match in the middle // of a character. {"[a-c]*", "x", "\u65e5", "x\u65e5x"}, {"[^\u65e5]", "x", "abc\u65e5def", "xxx\u65e5xxx"}, // Start and end of a string. {"^[a-c]*", "x", "abcdabc", "xdabc"}, {"[a-c]*$", "x", "abcdabc", "abcdxx"}, {"^[a-c]*$", "x", "abcdabc", "abcdabc"}, {"^[a-c]*", "x", "abc", "x"}, {"[a-c]*$", "x", "abc", "xx"}, {"^[a-c]*$", "x", "abc", "x"}, {"^[a-c]*", "x", "dabce", "xdabce"}, {"[a-c]*$", "x", "dabce", "dabcex"}, {"^[a-c]*$", "x", "dabce", "dabce"}, {"^[a-c]*", "x", "", "x"}, {"[a-c]*$", "x", "", "x"}, {"^[a-c]*$", "x", "", "x"}, {"^[a-c]+", "x", "abcdabc", "xdabc"}, {"[a-c]+$", "x", "abcdabc", "abcdx"}, {"^[a-c]+$", "x", "abcdabc", "abcdabc"}, {"^[a-c]+", "x", "abc", "x"}, {"[a-c]+$", "x", "abc", "x"}, {"^[a-c]+$", "x", "abc", "x"}, {"^[a-c]+", "x", "dabce", "dabce"}, {"[a-c]+$", "x", "dabce", "dabce"}, {"^[a-c]+$", "x", "dabce", "dabce"}, {"^[a-c]+", "x", "", ""}, {"[a-c]+$", "x", "", ""}, {"^[a-c]+$", "x", "", ""}, // Other cases. {"abc", "def", "abcdefg", "defdefg"}, {"bc", "BC", "abcbcdcdedef", "aBCBCdcdedef"}, {"abc", "", "abcdabc", "d"}, {"x", "xXx", "xxxXxxx", "xXxxXxxXxXxXxxXxxXx"}, {"abc", "d", "", ""}, {"abc", "d", "abc", "d"}, {".+", "x", "abc", "x"}, {"[a-c]*", "x", "def", "xdxexfx"}, {"[a-c]+", "x", "abcbcdcdedef", "xdxdedef"}, {"[a-c]*", "x", "abcbcdcdedef", "xxdxxdxexdxexfx"}, {"(foo)*bar(s)", "\\1", "bars", ""}, } type ReplaceFuncTest struct { pattern string replacement func(string) string input, output string } var replaceFuncTests = []ReplaceFuncTest{ {"[a-c]", func(s string) string { return "x" + s + "y" }, "defabcdef", "defxayxbyxcydef"}, {"[a-c]+", func(s string) string { return "x" + s + "y" }, "defabcdef", "defxabcydef"}, {"[a-c]*", func(s string) string { return "x" + s + "y" }, "defabcdef", "xydxyexyfxabcyxydxyexyfxy"}, } func TestReplaceAll(t *testing.T) { for _, tc := range replaceTests { re, err := Compile(tc.pattern) if err != nil { t.Errorf("Unexpected error compiling %q: %v", tc.pattern, err) continue } actual := re.ReplaceAllString(tc.input, tc.replacement) if actual != tc.output { t.Errorf("%q.Replace(%q,%q) = %q; want %q", tc.pattern, tc.input, tc.replacement, actual, tc.output) } // now try bytes actual = string(re.ReplaceAll([]byte(tc.input), []byte(tc.replacement))) if actual != tc.output { t.Errorf("%q.Replace(%q,%q) = %q; want %q", tc.pattern, tc.input, tc.replacement, actual, tc.output) } } } func TestReplaceAllFunc(t *testing.T) { for _, tc := range replaceFuncTests { re, err := Compile(tc.pattern) if err != nil { t.Errorf("Unexpected error compiling %q: %v", tc.pattern, err) continue } actual := re.ReplaceAllStringFunc(tc.input, tc.replacement) if actual != tc.output { t.Errorf("%q.ReplaceFunc(%q) = %q; want %q", tc.pattern, tc.input, actual, tc.output) } // now try bytes actual = string(re.ReplaceAllFunc([]byte(tc.input), func(s []byte) []byte { return []byte(tc.replacement(string(s))) })) if actual != tc.output { t.Errorf("%q.ReplaceFunc(%q) = %q; want %q", tc.pattern, tc.input, actual, tc.output) } } } /* * "hallo".gsub(/h(.*)llo/, "e") */ func TestGsub1(t *testing.T) { input := "hallo" pattern := "h(.*)llo" expected := "e" re, err := Compile(pattern) if err != nil { t.Errorf("Unexpected error compiling %q: %v", pattern, err) return } actual := re.Gsub(input, "e") if actual != expected { t.Errorf("expected %q, actual %q\n", expected, actual) } } /* * "hallo".gsub(/h(?.*)llo/, "\\k") */ func TestGsubNamedCapture1(t *testing.T) { input := "hallo" pattern := "h(?.*)llo" expected := "a" re, err := Compile(pattern) if err != nil { t.Errorf("Unexpected error compiling %q: %v", pattern, err) return } actual := re.Gsub(input, "\\k") if actual != expected { t.Errorf("expected %q, actual %q\n", expected, actual) } } /* * "hallo".gsub(/h(?.*)ll(?.*)/, "\\k\\k\\k") */ func TestGsubNamedCapture2(t *testing.T) { input := "hallo" pattern := "h(?.*)ll(?.*)" expected := "aoa" re, err := Compile(pattern) if err != nil { t.Errorf("Unexpected error compiling %q: %v", pattern, err) return } actual := re.Gsub(input, "\\k\\k\\k") if actual != expected { t.Errorf("expected %q, actual %q\n", expected, actual) } } /* * "hallo".gsub(/h(?.*)(l*)(?.*)/, "\\k\\k\\k\\1") */ func TestGsubNamedCapture3(t *testing.T) { input := "hallo" pattern := "h(?.*)(l*)(?.*)" expected := "alloallo" re, err := Compile(pattern) if err != nil { t.Errorf("Unexpected error compiling %q: %v", pattern, err) return } actual := re.Gsub(input, "\\k\\k\\k\\1") if actual != expected { t.Errorf("expected %q, actual %q\n", expected, actual) } } /* * "hallo".gsub(/h(?.*)(l*)(?.*)/, "\\k\\k\\k\\1") */ func TestGsubNamedCapture4(t *testing.T) { input := "The lamb was sure to go." pattern := "(?[^\\s\\.]+)(?\\s)" expected := "They lamby wasy surey toy go." re, err := Compile(pattern) if err != nil { t.Errorf("Unexpected error compiling %q: %v", pattern, err) return } actual := re.GsubFunc(input, func(_ string, captures map[string]string) string { return captures["word"] + "y" + captures["white_space"] }) if actual != expected { t.Errorf("expected %q, actual %q\n", expected, actual) } } /* * "hallo".gsub(/h(.*)llo/) { |match| * "e" * } */ func TestGsubFunc1(t *testing.T) { input := "hallo" pattern := "h(.*)llo" expected := "e" re, err := Compile(pattern) if err != nil { t.Errorf("Unexpected error compiling %q: %v", pattern, err) return } actual := re.GsubFunc(input, func(match string, captures map[string]string) string { return "e" }) if actual != expected { t.Errorf("expected %q, actual %q\n", expected, actual) } } /* * @env = {} * "hallo".gsub(/h(.*)llo/) { |match| * $~.captures.each_with_index do |arg, index| * @env["#{index + 1}"] = arg * "abcd".gsub(/(d)/) do * env["1"] * end * end * } */ func TestGsubFunc2(t *testing.T) { input := "hallo" pattern := "h(.*)llo" expected := "abca" env := make(map[string]string) re, err := Compile(pattern) if err != nil { t.Errorf("Unexpected error compiling %q: %v", pattern, err) return } actual := re.GsubFunc(input, func(_ string, captures map[string]string) string { for name, capture := range captures { env[name] = capture } re1 := MustCompile("(d)") return re1.GsubFunc("abcd", func(_ string, captures2 map[string]string) string { return env["1"] }) }) if actual != expected { t.Errorf("expected %q, actual %q\n", expected, actual) } } /* how to match $ as itself */ func TestPattern1(t *testing.T) { re := MustCompile(`b\$a`) if !re.MatchString("b$a") { t.Errorf("expect to match\n") } re = MustCompile("b\\$a") if !re.MatchString("b$a") { t.Errorf("expect to match 2\n") } } /* how to use $ as the end of line */ func TestPattern2(t *testing.T) { re := MustCompile("a$") if !re.MatchString("a") { t.Errorf("expect to match\n") } if re.MatchString("ab") { t.Errorf("expect to mismatch\n") } } func TestCompileWithOption(t *testing.T) { re := MustCompileWithOption("a$", ONIG_OPTION_IGNORECASE) if !re.MatchString("A") { t.Errorf("expect to match\n") } re = MustCompile("a$") if re.MatchString("A") { t.Errorf("expect to mismatch\n") } } type MetaTest struct { pattern, output, literal string isLiteral bool } var metaTests = []MetaTest{ {``, ``, ``, true}, {`foo`, `foo`, `foo`, true}, {`foo\.\$`, `foo\\\.\\\$`, `foo.$`, true}, // has meta but no operator {`foo.\$`, `foo\.\\\$`, `foo`, false}, // has escaped operators and real operators {`!@#$%^&*()_+-=[{]}\|,<.>/?~`, `!@#\$%\^&\*\(\)_\+-=\[{\]}\\\|,<\.>/\?~`, `!@#`, false}, } func TestQuoteMeta(t *testing.T) { for _, tc := range metaTests { // Verify that QuoteMeta returns the expected string. quoted := QuoteMeta(tc.pattern) if quoted != tc.output { t.Errorf("QuoteMeta(`%s`) = `%s`; want `%s`", tc.pattern, quoted, tc.output) continue } // Verify that the quoted string is in fact treated as expected // by Compile -- i.e. that it matches the original, unquoted string. if tc.pattern != "" { re, err := Compile(quoted) if err != nil { t.Errorf("Unexpected error compiling QuoteMeta(`%s`): %v", tc.pattern, err) continue } src := "abc" + tc.pattern + "def" repl := "xyz" replaced := re.ReplaceAllString(src, repl) expected := "abcxyzdef" if replaced != expected { t.Errorf("QuoteMeta(`%s`).Replace(`%s`,`%s`) = `%s`; want `%s`", tc.pattern, src, repl, replaced, expected) } } } } type numSubexpCase struct { input string expected int } var numSubexpCases = []numSubexpCase{ {``, 0}, {`.*`, 0}, {`abba`, 0}, {`ab(b)a`, 1}, {`ab(.*)a`, 1}, {`(.*)ab(.*)a`, 2}, {`(.*)(ab)(.*)a`, 3}, {`(.*)((a)b)(.*)a`, 4}, {`(.*)(\(ab)(.*)a`, 3}, {`(.*)(\(a\)b)(.*)a`, 3}, } func TestNumSubexp(t *testing.T) { for _, c := range numSubexpCases { re := MustCompile(c.input) n := re.NumSubexp() if n != c.expected { t.Errorf("NumSubexp for %q returned %d, expected %d", c.input, n, c.expected) } } } // For each pattern/text pair, what is the expected output of each function? // We can derive the textual results from the indexed results, the non-submatch // results from the submatched results, the single results from the 'all' results, // and the byte results from the string results. Therefore the table includes // only the FindAllStringSubmatchIndex result. type FindTest struct { pat string text string matches [][]int } func (t FindTest) String() string { return fmt.Sprintf("pattern: %#q text: %#q", t.pat, t.text) } var findTests = []FindTest{ {``, ``, build(1, 0, 0)}, {`^abcdefg`, "abcdefg", build(1, 0, 7)}, {`a+`, "baaab", build(1, 1, 4)}, {"abcd..", "abcdef", build(1, 0, 6)}, {`a`, "a", build(1, 0, 1)}, {`x`, "y", nil}, {`b`, "abc", build(1, 1, 2)}, {`.`, "a", build(1, 0, 1)}, {`.*`, "abcdef", build(2, 0, 6, 6, 6)}, {`^`, "abcde", build(1, 0, 0)}, {`$`, "abcde", build(1, 5, 5)}, {`^abcd$`, "abcd", build(1, 0, 4)}, {`^bcd'`, "abcdef", nil}, {`^abcd$`, "abcde", nil}, {`a+`, "baaab", build(1, 1, 4)}, {`a*`, "baaab", build(4, 0, 0, 1, 4, 4, 4, 5, 5)}, {`[a-z]+`, "abcd", build(1, 0, 4)}, {`[^a-z]+`, "ab1234cd", build(1, 2, 6)}, {`[a\-\]z]+`, "az]-bcz", build(2, 0, 4, 6, 7)}, {`[^\n]+`, "abcd\n", build(1, 0, 4)}, {`[日本語]+`, "日本語日本語", build(1, 0, 18)}, {`日本語+`, "日本語", build(1, 0, 9)}, {`a*`, "日本語", build(4, 0, 0, 3, 3, 6, 6, 9, 9)}, {`日本語+`, "日本語語語語", build(1, 0, 18)}, {`()`, "", build(1, 0, 0, 0, 0)}, {`(a)`, "a", build(1, 0, 1, 0, 1)}, {`(.)(.)`, "日a", build(1, 0, 4, 0, 3, 3, 4)}, {`(.*)`, "", build(1, 0, 0, 0, 0)}, {`(.*)`, "abcd", build(2, 0, 4, 0, 4, 4, 4, 4, 4)}, {`(..)(..)`, "abcd", build(1, 0, 4, 0, 2, 2, 4)}, {`(([^xyz]*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 3, 4)}, {`((a|b|c)*(d))`, "abcd", build(1, 0, 4, 0, 4, 2, 3, 3, 4)}, {`(((a|b|c)*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 2, 3, 3, 4)}, {"\a\b\f\n\r\t\v", "\a\b\f\n\r\t\v", build(1, 0, 7)}, {`[\a\b\f\n\r\t\v]+`, "\a\b\f\n\r\t\v", build(1, 0, 7)}, //{`a*(|(b))c*`, "aacc", build(2, 0, 4, 4, 4)}, {`(.*).*`, "ab", build(2, 0, 2, 0, 2, 2, 2, 2, 2)}, {`[.]`, ".", build(1, 0, 1)}, {`/$`, "/abc/", build(1, 4, 5)}, {`/$`, "/abc", nil}, // multiple matches {`.`, "abc", build(3, 0, 1, 1, 2, 2, 3)}, {`(.)`, "abc", build(3, 0, 1, 0, 1, 1, 2, 1, 2, 2, 3, 2, 3)}, {`.(.)`, "abcd", build(2, 0, 2, 1, 2, 2, 4, 3, 4)}, {`ab*`, "abbaab", build(3, 0, 3, 3, 4, 4, 6)}, {`a(b*)`, "abbaab", build(3, 0, 3, 1, 3, 3, 4, 4, 4, 4, 6, 5, 6)}, // fixed bugs {`ab$`, "cab", build(1, 1, 3)}, {`axxb$`, "axxcb", nil}, {`data`, "daXY data", build(1, 5, 9)}, {`da(.)a$`, "daXY data", build(1, 5, 9, 7, 8)}, {`zx+`, "zzx", build(1, 1, 3)}, // can backslash-escape any punctuation {`\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~`, `!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, build(1, 0, 31)}, {`[\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~]+`, `!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, build(1, 0, 31)}, {"\\`", "`", build(1, 0, 1)}, {"[\\`]+", "`", build(1, 0, 1)}, // long set of matches (longer than startSize) { ".", "qwertyuiopasdfghjklzxcvbnm1234567890", build(36, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 30, 30, 31, 31, 32, 32, 33, 33, 34, 34, 35, 35, 36), }, } // build is a helper to construct a [][]int by extracting n sequences from x. // This represents n matches with len(x)/n submatches each. func build(n int, x ...int) [][]int { ret := make([][]int, n) runLength := len(x) / n j := 0 for i := range ret { ret[i] = make([]int, runLength) copy(ret[i], x[j:]) j += runLength if j > len(x) { panic("invalid build entry") } } return ret } // First the simple cases. func TestFind(t *testing.T) { for _, test := range findTests { re := MustCompile(test.pat) if re.String() != test.pat { t.Errorf("String() = `%s`; should be `%s`", re.String(), test.pat) } result := re.Find([]byte(test.text)) switch { case len(test.matches) == 0 && len(result) == 0: // ok case test.matches == nil && result != nil: t.Errorf("expected no match; got one: %s", test) case test.matches != nil && result == nil: t.Errorf("expected match; got none: %s", test) case test.matches != nil && result != nil: expect := test.text[test.matches[0][0]:test.matches[0][1]] if expect != string(result) { t.Errorf("expected %q got %q: %s", expect, result, test) } } } } func TestFindString(t *testing.T) { for _, test := range findTests { result := MustCompile(test.pat).FindString(test.text) switch { case len(test.matches) == 0 && len(result) == 0: // ok case test.matches == nil && result != "": t.Errorf("expected no match; got one: %s", test) case test.matches != nil && result == "": // Tricky because an empty result has two meanings: no match or empty match. if test.matches[0][0] != test.matches[0][1] { t.Errorf("expected match; got none: %s", test) } case test.matches != nil && result != "": expect := test.text[test.matches[0][0]:test.matches[0][1]] if expect != result { t.Errorf("expected %q got %q: %s", expect, result, test) } } } } func testFindIndex(test *FindTest, result []int, t *testing.T) { switch { case len(test.matches) == 0 && len(result) == 0: // ok case test.matches == nil && result != nil: t.Errorf("expected no match; got one: %s", test) case test.matches != nil && result == nil: t.Errorf("expected match; got none: %s", test) case test.matches != nil && result != nil: expect := test.matches[0] if expect[0] != result[0] || expect[1] != result[1] { t.Errorf("expected %v got %v: %s", expect, result, test) } } } func TestFindIndex(t *testing.T) { for _, test := range findTests { testFindIndex(&test, MustCompile(test.pat).FindIndex([]byte(test.text)), t) } } func TestFindStringIndex(t *testing.T) { for _, test := range findTests { testFindIndex(&test, MustCompile(test.pat).FindStringIndex(test.text), t) } } func TestFindStringContentType(t *testing.T) { pattern := `text/(.*);\s*charset\s*=\s*(.*)` regex := MustCompile(pattern) data1 := "text/html; charset=utf8" data2 := "text/;charset=iso-8859-1" data3 := "image/png" matches := regex.FindStringSubmatch(data1) if matches[1] != "html" || matches[2] != "utf8" { t.Errorf("does not match content-type 1") } matches = regex.FindStringSubmatch(data2) if matches[1] != "" || matches[2] != "iso-8859-1" { println(matches[1]) println(matches[2]) t.Errorf("does not match content-type 2") } matches = regex.FindStringSubmatch(data3) if len(matches) != 0 { t.Errorf("does not match content-type 3") } } func TestFindReaderIndex(t *testing.T) { for _, test := range findTests { testFindIndex(&test, MustCompile(test.pat).FindReaderIndex(strings.NewReader(test.text)), t) } } // Now come the simple All cases. func TestFindAll(t *testing.T) { for _, test := range findTests { result := MustCompile(test.pat).FindAll([]byte(test.text), -1) switch { case test.matches == nil && result == nil: // ok case test.matches == nil && result != nil: t.Errorf("expected no match; got one: %s", test) case test.matches != nil && result == nil: t.Errorf("expected match; got none: %s", test) case test.matches != nil && result != nil: if len(test.matches) != len(result) { t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) continue } for k, e := range test.matches { expect := test.text[e[0]:e[1]] if expect != string(result[k]) { t.Errorf("match %d: expected %q got %q: %s", k, expect, result[k], test) } } } } } func TestFindAllString(t *testing.T) { for _, test := range findTests { result := MustCompile(test.pat).FindAllString(test.text, -1) switch { case test.matches == nil && result == nil: // ok case test.matches == nil && result != nil: t.Errorf("expected no match; got one: %s", test) case test.matches != nil && result == nil: t.Errorf("expected match; got none: %s", test) case test.matches != nil && result != nil: if len(test.matches) != len(result) { t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) continue } for k, e := range test.matches { expect := test.text[e[0]:e[1]] if expect != result[k] { t.Errorf("expected %q got %q: %s", expect, result, test) } } } } } func testFindAllIndex(test *FindTest, result [][]int, t *testing.T) { switch { case test.matches == nil && result == nil: // ok case test.matches == nil && result != nil: t.Errorf("expected no match; got one: %s", test) case test.matches != nil && result == nil: t.Errorf("expected match; got none: %s", test) case test.matches != nil && result != nil: if len(test.matches) != len(result) { t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) return } for k, e := range test.matches { if e[0] != result[k][0] || e[1] != result[k][1] { t.Errorf("match %d: expected %v got %v: %s", k, e, result[k], test) } } } } func TestFindAllIndex(t *testing.T) { for _, test := range findTests { testFindAllIndex(&test, MustCompile(test.pat).FindAllIndex([]byte(test.text), -1), t) } } func TestFindAllStringIndex(t *testing.T) { for _, test := range findTests { testFindAllIndex(&test, MustCompile(test.pat).FindAllStringIndex(test.text, -1), t) } } // Now come the Submatch cases. func testSubmatchBytes(test *FindTest, n int, submatches []int, result [][]byte, t *testing.T) { if len(submatches) != len(result)*2 { t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test) return } for k := 0; k < len(submatches); k += 2 { if submatches[k] == -1 { if result[k/2] != nil { t.Errorf("match %d: expected nil got %q: %s", n, result, test) } continue } expect := test.text[submatches[k]:submatches[k+1]] if expect != string(result[k/2]) { t.Errorf("match %d: expected %q got %q: %s", n, expect, result, test) return } } } func TestFindSubmatch(t *testing.T) { for _, test := range findTests { result := MustCompile(test.pat).FindSubmatch([]byte(test.text)) switch { case test.matches == nil && result == nil: // ok case test.matches == nil && result != nil: t.Errorf("expected no match; got one: %s", test) case test.matches != nil && result == nil: t.Errorf("expected match; got none: %s", test) case test.matches != nil && result != nil: testSubmatchBytes(&test, 0, test.matches[0], result, t) } } } func testSubmatchString(test *FindTest, n int, submatches []int, result []string, t *testing.T) { if len(submatches) != len(result)*2 { t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test) return } for k := 0; k < len(submatches); k += 2 { if submatches[k] == -1 { if result[k/2] != "" { t.Errorf("match %d: expected nil got %q: %s", n, result, test) } continue } expect := test.text[submatches[k]:submatches[k+1]] if expect != result[k/2] { t.Errorf("match %d: expected %q got %q: %s", n, expect, result, test) return } } } func TestFindStringSubmatch(t *testing.T) { for _, test := range findTests { result := MustCompile(test.pat).FindStringSubmatch(test.text) switch { case test.matches == nil && result == nil: // ok case test.matches == nil && result != nil: t.Errorf("expected no match; got one: %s", test) case test.matches != nil && result == nil: t.Errorf("expected match; got none: %s", test) case test.matches != nil && result != nil: testSubmatchString(&test, 0, test.matches[0], result, t) } } } func testSubmatchIndices(test *FindTest, n int, expect, result []int, t *testing.T) { if len(expect) != len(result) { t.Errorf("match %d: expected %d matches; got %d: %s", n, len(expect)/2, len(result)/2, test) return } for k, e := range expect { if e != result[k] { t.Errorf("match %d: submatch error: expected %v got %v: %s", n, expect, result, test) } } } func testFindSubmatchIndex(test *FindTest, result []int, t *testing.T) { switch { case test.matches == nil && result == nil: // ok case test.matches == nil && result != nil: t.Errorf("expected no match; got one: %s", test) case test.matches != nil && result == nil: t.Errorf("expected match; got none: %s", test) case test.matches != nil && result != nil: testSubmatchIndices(test, 0, test.matches[0], result, t) } } func TestFindSubmatchIndex(t *testing.T) { for _, test := range findTests { testFindSubmatchIndex(&test, MustCompile(test.pat).FindSubmatchIndex([]byte(test.text)), t) } } func TestFindStringSubmatchIndex(t *testing.T) { for _, test := range findTests { testFindSubmatchIndex(&test, MustCompile(test.pat).FindStringSubmatchIndex(test.text), t) } } func TestFindReaderSubmatchIndex(t *testing.T) { for _, test := range findTests { testFindSubmatchIndex(&test, MustCompile(test.pat).FindReaderSubmatchIndex(strings.NewReader(test.text)), t) } } // Now come the monster AllSubmatch cases. func TestFindAllSubmatch(t *testing.T) { for _, test := range findTests { result := MustCompile(test.pat).FindAllSubmatch([]byte(test.text), -1) switch { case test.matches == nil && result == nil: // ok case test.matches == nil && result != nil: t.Errorf("expected no match; got one: %s", test) case test.matches != nil && result == nil: t.Errorf("expected match; got none: %s", test) case len(test.matches) != len(result): t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) case test.matches != nil && result != nil: for k, match := range test.matches { testSubmatchBytes(&test, k, match, result[k], t) } } } } func TestFindAllStringSubmatch(t *testing.T) { for _, test := range findTests { result := MustCompile(test.pat).FindAllStringSubmatch(test.text, -1) switch { case test.matches == nil && result == nil: // ok case test.matches == nil && result != nil: t.Errorf("expected no match; got one: %s", test) case test.matches != nil && result == nil: t.Errorf("expected match; got none: %s", test) case len(test.matches) != len(result): t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) case test.matches != nil && result != nil: for k, match := range test.matches { testSubmatchString(&test, k, match, result[k], t) } } } } func testFindAllSubmatchIndex(test *FindTest, result [][]int, t *testing.T) { switch { case test.matches == nil && result == nil: // ok case test.matches == nil && result != nil: t.Errorf("expected no match; got one: %s", test) case test.matches != nil && result == nil: t.Errorf("expected match; got none: %s", test) case len(test.matches) != len(result): t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test) case test.matches != nil && result != nil: for k, match := range test.matches { testSubmatchIndices(test, k, match, result[k], t) } } } func TestFindAllSubmatchIndex(t *testing.T) { for _, test := range findTests { testFindAllSubmatchIndex(&test, MustCompile(test.pat).FindAllSubmatchIndex([]byte(test.text), -1), t) } } func TestFindAllStringSubmatchIndex(t *testing.T) { for _, test := range findTests { testFindAllSubmatchIndex(&test, MustCompile(test.pat).FindAllStringSubmatchIndex(test.text, -1), t) } } func BenchmarkLiteral(b *testing.B) { x := strings.Repeat("x", 50) + "y" b.StopTimer() re := MustCompile("y") b.StartTimer() for i := 0; i < b.N; i++ { if !re.MatchString(x) { println("no match!") break } } } func BenchmarkNotLiteral(b *testing.B) { x := strings.Repeat("x", 50) + "y" b.StopTimer() re := MustCompile(".y") b.StartTimer() for i := 0; i < b.N; i++ { if !re.MatchString(x) { println("no match!") break } } } func BenchmarkMatchClass(b *testing.B) { b.StopTimer() x := strings.Repeat("xxxx", 20) + "w" re := MustCompile("[abcdw]") b.StartTimer() for i := 0; i < b.N; i++ { if !re.MatchString(x) { println("no match!") break } } } func BenchmarkMatchClass_InRange(b *testing.B) { b.StopTimer() // 'b' is between 'a' and 'c', so the charclass // range checking is no help here. x := strings.Repeat("bbbb", 20) + "c" re := MustCompile("[ac]") b.StartTimer() for i := 0; i < b.N; i++ { if !re.MatchString(x) { println("no match!") break } } } func BenchmarkReplaceAll(b *testing.B) { x := "abcdefghijklmnopqrstuvwxyz" b.StopTimer() re := MustCompile("[cjrw]") b.StartTimer() for i := 0; i < b.N; i++ { re.ReplaceAllString(x, "") } } func BenchmarkFindAllStringSubmatchIndex(b *testing.B) { x := "abcdefghijklmnopqrstuvwxyz" b.StopTimer() re := MustCompile("[cjrw]") b.StartTimer() for i := 0; i < b.N; i++ { re.FindAllStringSubmatchIndex(x, 0) } } func BenchmarkAnchoredLiteralShortNonMatch(b *testing.B) { b.StopTimer() x := []byte("abcdefghijklmnopqrstuvwxyz") re := MustCompile("^zbc(d|e)") b.StartTimer() for i := 0; i < b.N; i++ { re.Match(x) } } func BenchmarkAnchoredLiteralLongNonMatch(b *testing.B) { b.StopTimer() x := []byte("abcdefghijklmnopqrstuvwxyz") for i := 0; i < 15; i++ { x = append(x, x...) } re := MustCompile("^zbc(d|e)") b.StartTimer() for i := 0; i < b.N; i++ { re.Match(x) } } func BenchmarkAnchoredShortMatch(b *testing.B) { b.StopTimer() x := []byte("abcdefghijklmnopqrstuvwxyz") re := MustCompile("^.bc(d|e)") b.StartTimer() for i := 0; i < b.N; i++ { re.Match(x) } } func BenchmarkAnchoredLongMatch(b *testing.B) { b.StopTimer() x := []byte("abcdefghijklmnopqrstuvwxyz") for i := 0; i < 15; i++ { x = append(x, x...) } re := MustCompile("^.bc(d|e)") b.StartTimer() for i := 0; i < b.N; i++ { re.Match(x) } }