pax_global_header00006660000000000000000000000064135653737520014532gustar00rootroot0000000000000052 comment=874a673206bf7a5c86d29876dfeaccca0922ab62 xstrings-1.2.1/000077500000000000000000000000001356537375200134145ustar00rootroot00000000000000xstrings-1.2.1/.gitignore000066400000000000000000000004121356537375200154010ustar00rootroot00000000000000# Compiled Object files, Static and Dynamic libs (Shared Objects) *.o *.a *.so # Folders _obj _test # Architecture specific extensions/prefixes *.[568vq] [568vq].out *.cgo1.go *.cgo2.c _cgo_defun.c _cgo_gotypes.go _cgo_export.* _testmain.go *.exe *.test *.prof xstrings-1.2.1/.travis.yml000066400000000000000000000005431356537375200155270ustar00rootroot00000000000000language: go install: - go get golang.org/x/tools/cmd/cover - go get github.com/mattn/goveralls script: - go test -v -covermode=count -coverprofile=coverage.out - 'if [ "$TRAVIS_PULL_REQUEST" = "false" ] && [ ! -z "$COVERALLS_TOKEN" ]; then $HOME/gopath/bin/goveralls -coverprofile=coverage.out -service=travis-ci -repotoken $COVERALLS_TOKEN; fi' xstrings-1.2.1/CONTRIBUTING.md000066400000000000000000000025731356537375200156540ustar00rootroot00000000000000# Contributing # Thanks for your contribution in advance. No matter what you will contribute to this project, pull request or bug report or feature discussion, it's always highly appreciated. ## New API or feature ## I want to speak more about how to add new functions to this package. Package `xstring` is a collection of useful string functions which should be implemented in Go. It's a bit subject to say which function should be included and which should not. I set up following rules in order to make it clear and as objective as possible. * Rule 1: Only string algorithm, which takes string as input, can be included. * Rule 2: If a function has been implemented in package `string`, it must not be included. * Rule 3: If a function is not language neutral, it must not be included. * Rule 4: If a function is a part of standard library in other languages, it can be included. * Rule 5: If a function is quite useful in some famous framework or library, it can be included. New function must be discussed in project issues before submitting any code. If a pull request with new functions is sent without any ref issue, it will be rejected. ## Pull request ## Pull request is always welcome. Just make sure you have run `go fmt` and all test cases passed before submit. If the pull request is to add a new API or feature, don't forget to update README.md and add new API in function list. xstrings-1.2.1/LICENSE000066400000000000000000000020631356537375200144220ustar00rootroot00000000000000The MIT License (MIT) Copyright (c) 2015 Huan Du Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. xstrings-1.2.1/README.md000066400000000000000000000240511356537375200146750ustar00rootroot00000000000000# xstrings # [![Build Status](https://travis-ci.org/huandu/xstrings.svg?branch=master)](https://travis-ci.org/huandu/xstrings) [![GoDoc](https://godoc.org/github.com/huandu/xstrings?status.svg)](https://godoc.org/github.com/huandu/xstrings) [![Go Report](https://goreportcard.com/badge/github.com/huandu/xstrings)](https://goreportcard.com/report/github.com/huandu/xstrings) [![Coverage Status](https://coveralls.io/repos/github/huandu/xstrings/badge.svg?branch=master)](https://coveralls.io/github/huandu/xstrings?branch=master) Go package [xstrings](https://godoc.org/github.com/huandu/xstrings) is a collection of string functions, which are widely used in other languages but absent in Go package [strings](http://golang.org/pkg/strings). All functions are well tested and carefully tuned for performance. ## Propose a new function ## Please review [contributing guideline](CONTRIBUTING.md) and [create new issue](https://github.com/huandu/xstrings/issues) to state why it should be included. ## Install ## Use `go get` to install this library. go get github.com/huandu/xstrings ## API document ## See [GoDoc](https://godoc.org/github.com/huandu/xstrings) for full document. ## Function list ## Go functions have a unique naming style. One, who has experience in other language but new in Go, may have difficulties to find out right string function to use. Here is a list of functions in [strings](http://golang.org/pkg/strings) and [xstrings](https://godoc.org/github.com/huandu/xstrings) with enough extra information about how to map these functions to their friends in other languages. Hope this list could be helpful for fresh gophers. ### Package `xstrings` functions ### *Keep this table sorted by Function in ascending order.* | Function | Friends | # | | -------- | ------- | --- | | [Center](https://godoc.org/github.com/huandu/xstrings#Center) | `str.center` in Python; `String#center` in Ruby | [#30](https://github.com/huandu/xstrings/issues/30) | | [Count](https://godoc.org/github.com/huandu/xstrings#Count) | `String#count` in Ruby | [#16](https://github.com/huandu/xstrings/issues/16) | | [Delete](https://godoc.org/github.com/huandu/xstrings#Delete) | `String#delete` in Ruby | [#17](https://github.com/huandu/xstrings/issues/17) | | [ExpandTabs](https://godoc.org/github.com/huandu/xstrings#ExpandTabs) | `str.expandtabs` in Python | [#27](https://github.com/huandu/xstrings/issues/27) | | [FirstRuneToLower](https://godoc.org/github.com/huandu/xstrings#FirstRuneToLower) | `lcfirst` in PHP or Perl | [#15](https://github.com/huandu/xstrings/issues/15) | | [FirstRuneToUpper](https://godoc.org/github.com/huandu/xstrings#FirstRuneToUpper) | `String#capitalize` in Ruby; `ucfirst` in PHP or Perl | [#15](https://github.com/huandu/xstrings/issues/15) | | [Insert](https://godoc.org/github.com/huandu/xstrings#Insert) | `String#insert` in Ruby | [#18](https://github.com/huandu/xstrings/issues/18) | | [LastPartition](https://godoc.org/github.com/huandu/xstrings#LastPartition) | `str.rpartition` in Python; `String#rpartition` in Ruby | [#19](https://github.com/huandu/xstrings/issues/19) | | [LeftJustify](https://godoc.org/github.com/huandu/xstrings#LeftJustify) | `str.ljust` in Python; `String#ljust` in Ruby | [#28](https://github.com/huandu/xstrings/issues/28) | | [Len](https://godoc.org/github.com/huandu/xstrings#Len) | `mb_strlen` in PHP | [#23](https://github.com/huandu/xstrings/issues/23) | | [Partition](https://godoc.org/github.com/huandu/xstrings#Partition) | `str.partition` in Python; `String#partition` in Ruby | [#10](https://github.com/huandu/xstrings/issues/10) | | [Reverse](https://godoc.org/github.com/huandu/xstrings#Reverse) | `String#reverse` in Ruby; `strrev` in PHP; `reverse` in Perl | [#7](https://github.com/huandu/xstrings/issues/7) | | [RightJustify](https://godoc.org/github.com/huandu/xstrings#RightJustify) | `str.rjust` in Python; `String#rjust` in Ruby | [#29](https://github.com/huandu/xstrings/issues/29) | | [RuneWidth](https://godoc.org/github.com/huandu/xstrings#RuneWidth) | - | [#27](https://github.com/huandu/xstrings/issues/27) | | [Scrub](https://godoc.org/github.com/huandu/xstrings#Scrub) | `String#scrub` in Ruby | [#20](https://github.com/huandu/xstrings/issues/20) | | [Shuffle](https://godoc.org/github.com/huandu/xstrings#Shuffle) | `str_shuffle` in PHP | [#13](https://github.com/huandu/xstrings/issues/13) | | [ShuffleSource](https://godoc.org/github.com/huandu/xstrings#ShuffleSource) | `str_shuffle` in PHP | [#13](https://github.com/huandu/xstrings/issues/13) | | [Slice](https://godoc.org/github.com/huandu/xstrings#Slice) | `mb_substr` in PHP | [#9](https://github.com/huandu/xstrings/issues/9) | | [Squeeze](https://godoc.org/github.com/huandu/xstrings#Squeeze) | `String#squeeze` in Ruby | [#11](https://github.com/huandu/xstrings/issues/11) | | [Successor](https://godoc.org/github.com/huandu/xstrings#Successor) | `String#succ` or `String#next` in Ruby | [#22](https://github.com/huandu/xstrings/issues/22) | | [SwapCase](https://godoc.org/github.com/huandu/xstrings#SwapCase) | `str.swapcase` in Python; `String#swapcase` in Ruby | [#12](https://github.com/huandu/xstrings/issues/12) | | [ToCamelCase](https://godoc.org/github.com/huandu/xstrings#ToCamelCase) | `String#camelize` in RoR | [#1](https://github.com/huandu/xstrings/issues/1) | | [ToKebab](https://godoc.org/github.com/huandu/xstrings#ToKebabCase) | - | [#41](https://github.com/huandu/xstrings/issues/41) | | [ToSnakeCase](https://godoc.org/github.com/huandu/xstrings#ToSnakeCase) | `String#underscore` in RoR | [#1](https://github.com/huandu/xstrings/issues/1) | | [Translate](https://godoc.org/github.com/huandu/xstrings#Translate) | `str.translate` in Python; `String#tr` in Ruby; `strtr` in PHP; `tr///` in Perl | [#21](https://github.com/huandu/xstrings/issues/21) | | [Width](https://godoc.org/github.com/huandu/xstrings#Width) | `mb_strwidth` in PHP | [#26](https://github.com/huandu/xstrings/issues/26) | | [WordCount](https://godoc.org/github.com/huandu/xstrings#WordCount) | `str_word_count` in PHP | [#14](https://github.com/huandu/xstrings/issues/14) | | [WordSplit](https://godoc.org/github.com/huandu/xstrings#WordSplit) | - | [#14](https://github.com/huandu/xstrings/issues/14) | ### Package `strings` functions ### *Keep this table sorted by Function in ascending order.* | Function | Friends | | -------- | ------- | | [Contains](http://golang.org/pkg/strings/#Contains) | `String#include?` in Ruby | | [ContainsAny](http://golang.org/pkg/strings/#ContainsAny) | - | | [ContainsRune](http://golang.org/pkg/strings/#ContainsRune) | - | | [Count](http://golang.org/pkg/strings/#Count) | `str.count` in Python; `substr_count` in PHP | | [EqualFold](http://golang.org/pkg/strings/#EqualFold) | `stricmp` in PHP; `String#casecmp` in Ruby | | [Fields](http://golang.org/pkg/strings/#Fields) | `str.split` in Python; `split` in Perl; `String#split` in Ruby | | [FieldsFunc](http://golang.org/pkg/strings/#FieldsFunc) | - | | [HasPrefix](http://golang.org/pkg/strings/#HasPrefix) | `str.startswith` in Python; `String#start_with?` in Ruby | | [HasSuffix](http://golang.org/pkg/strings/#HasSuffix) | `str.endswith` in Python; `String#end_with?` in Ruby | | [Index](http://golang.org/pkg/strings/#Index) | `str.index` in Python; `String#index` in Ruby; `strpos` in PHP; `index` in Perl | | [IndexAny](http://golang.org/pkg/strings/#IndexAny) | - | | [IndexByte](http://golang.org/pkg/strings/#IndexByte) | - | | [IndexFunc](http://golang.org/pkg/strings/#IndexFunc) | - | | [IndexRune](http://golang.org/pkg/strings/#IndexRune) | - | | [Join](http://golang.org/pkg/strings/#Join) | `str.join` in Python; `Array#join` in Ruby; `implode` in PHP; `join` in Perl | | [LastIndex](http://golang.org/pkg/strings/#LastIndex) | `str.rindex` in Python; `String#rindex`; `strrpos` in PHP; `rindex` in Perl | | [LastIndexAny](http://golang.org/pkg/strings/#LastIndexAny) | - | | [LastIndexFunc](http://golang.org/pkg/strings/#LastIndexFunc) | - | | [Map](http://golang.org/pkg/strings/#Map) | `String#each_codepoint` in Ruby | | [Repeat](http://golang.org/pkg/strings/#Repeat) | operator `*` in Python and Ruby; `str_repeat` in PHP | | [Replace](http://golang.org/pkg/strings/#Replace) | `str.replace` in Python; `String#sub` in Ruby; `str_replace` in PHP | | [Split](http://golang.org/pkg/strings/#Split) | `str.split` in Python; `String#split` in Ruby; `explode` in PHP; `split` in Perl | | [SplitAfter](http://golang.org/pkg/strings/#SplitAfter) | - | | [SplitAfterN](http://golang.org/pkg/strings/#SplitAfterN) | - | | [SplitN](http://golang.org/pkg/strings/#SplitN) | `str.split` in Python; `String#split` in Ruby; `explode` in PHP; `split` in Perl | | [Title](http://golang.org/pkg/strings/#Title) | `str.title` in Python | | [ToLower](http://golang.org/pkg/strings/#ToLower) | `str.lower` in Python; `String#downcase` in Ruby; `strtolower` in PHP; `lc` in Perl | | [ToLowerSpecial](http://golang.org/pkg/strings/#ToLowerSpecial) | - | | [ToTitle](http://golang.org/pkg/strings/#ToTitle) | - | | [ToTitleSpecial](http://golang.org/pkg/strings/#ToTitleSpecial) | - | | [ToUpper](http://golang.org/pkg/strings/#ToUpper) | `str.upper` in Python; `String#upcase` in Ruby; `strtoupper` in PHP; `uc` in Perl | | [ToUpperSpecial](http://golang.org/pkg/strings/#ToUpperSpecial) | - | | [Trim](http://golang.org/pkg/strings/#Trim) | `str.strip` in Python; `String#strip` in Ruby; `trim` in PHP | | [TrimFunc](http://golang.org/pkg/strings/#TrimFunc) | - | | [TrimLeft](http://golang.org/pkg/strings/#TrimLeft) | `str.lstrip` in Python; `String#lstrip` in Ruby; `ltrim` in PHP | | [TrimLeftFunc](http://golang.org/pkg/strings/#TrimLeftFunc) | - | | [TrimPrefix](http://golang.org/pkg/strings/#TrimPrefix) | - | | [TrimRight](http://golang.org/pkg/strings/#TrimRight) | `str.rstrip` in Python; `String#rstrip` in Ruby; `rtrim` in PHP | | [TrimRightFunc](http://golang.org/pkg/strings/#TrimRightFunc) | - | | [TrimSpace](http://golang.org/pkg/strings/#TrimSpace) | `str.strip` in Python; `String#strip` in Ruby; `trim` in PHP | | [TrimSuffix](http://golang.org/pkg/strings/#TrimSuffix) | `String#chomp` in Ruby; `chomp` in Perl | ## License ## This library is licensed under MIT license. See LICENSE for details. xstrings-1.2.1/common.go000066400000000000000000000010331356537375200152300ustar00rootroot00000000000000// Copyright 2015 Huan Du. All rights reserved. // Licensed under the MIT license that can be found in the LICENSE file. package xstrings import ( "bytes" ) const bufferMaxInitGrowSize = 2048 // Lazy initialize a buffer. func allocBuffer(orig, cur string) *bytes.Buffer { output := &bytes.Buffer{} maxSize := len(orig) * 4 // Avoid to reserve too much memory at once. if maxSize > bufferMaxInitGrowSize { maxSize = bufferMaxInitGrowSize } output.Grow(maxSize) output.WriteString(orig[:len(orig)-len(cur)]) return output } xstrings-1.2.1/convert.go000066400000000000000000000204521356537375200154260ustar00rootroot00000000000000// Copyright 2015 Huan Du. All rights reserved. // Licensed under the MIT license that can be found in the LICENSE file. package xstrings import ( "bytes" "math/rand" "unicode" "unicode/utf8" ) // ToCamelCase can convert all lower case characters behind underscores // to upper case character. // Underscore character will be removed in result except following cases. // * More than 1 underscore. // "a__b" => "A_B" // * At the beginning of string. // "_a" => "_A" // * At the end of string. // "ab_" => "Ab_" func ToCamelCase(str string) string { if len(str) == 0 { return "" } buf := &bytes.Buffer{} var r0, r1 rune var size int // leading '_' will appear in output. for len(str) > 0 { r0, size = utf8.DecodeRuneInString(str) str = str[size:] if r0 != '_' { r0 = unicode.ToUpper(r0) break } buf.WriteRune(r0) } if len(str) == 0 { // A special case for a string contains only 1 rune. if size != 0 { buf.WriteRune(r0) } return buf.String() } for len(str) > 0 { r1 = r0 r0, size = utf8.DecodeRuneInString(str) str = str[size:] if r1 == '_' && r0 == '_' { buf.WriteRune(r1) continue } if r1 == '_' { r0 = unicode.ToUpper(r0) } else { r0 = unicode.ToLower(r0) } if r1 != '_' { buf.WriteRune(r1) } } buf.WriteRune(r0) return buf.String() } // ToSnakeCase can convert all upper case characters in a string to // snake case format. // // Some samples. // "FirstName" => "first_name" // "HTTPServer" => "http_server" // "NoHTTPS" => "no_https" // "GO_PATH" => "go_path" // "GO PATH" => "go_path" // space is converted to underscore. // "GO-PATH" => "go_path" // hyphen is converted to underscore. // "HTTP2XX" => "http_2xx" // insert an underscore before a number and after an alphabet. // "http2xx" => "http_2xx" // "HTTP20xOK" => "http_20x_ok" func ToSnakeCase(str string) string { return camelCaseToLowerCase(str, '_') } // ToKebabCase can convert all upper case characters in a string to // kebab case format. // // Some samples. // "FirstName" => "first-name" // "HTTPServer" => "http-server" // "NoHTTPS" => "no-https" // "GO_PATH" => "go-path" // "GO PATH" => "go-path" // space is converted to '-'. // "GO-PATH" => "go-path" // hyphen is converted to '-'. // "HTTP2XX" => "http-2xx" // insert a '-' before a number and after an alphabet. // "http2xx" => "http-2xx" // "HTTP20xOK" => "http-20x-ok" func ToKebabCase(str string) string { return camelCaseToLowerCase(str, '-') } func camelCaseToLowerCase(str string, connector rune) string { if len(str) == 0 { return "" } buf := &bytes.Buffer{} var prev, r0, r1 rune var size int r0 = connector for len(str) > 0 { prev = r0 r0, size = utf8.DecodeRuneInString(str) str = str[size:] switch { case r0 == utf8.RuneError: buf.WriteRune(r0) case unicode.IsUpper(r0): if prev != connector && !unicode.IsNumber(prev) { buf.WriteRune(connector) } buf.WriteRune(unicode.ToLower(r0)) if len(str) == 0 { break } r0, size = utf8.DecodeRuneInString(str) str = str[size:] if !unicode.IsUpper(r0) { buf.WriteRune(r0) break } // find next non-upper-case character and insert connector properly. // it's designed to convert `HTTPServer` to `http_server`. // if there are more than 2 adjacent upper case characters in a word, // treat them as an abbreviation plus a normal word. for len(str) > 0 { r1 = r0 r0, size = utf8.DecodeRuneInString(str) str = str[size:] if r0 == utf8.RuneError { buf.WriteRune(unicode.ToLower(r1)) buf.WriteRune(r0) break } if !unicode.IsUpper(r0) { if r0 == '_' || r0 == ' ' || r0 == '-' { r0 = connector buf.WriteRune(unicode.ToLower(r1)) } else if unicode.IsNumber(r0) { // treat a number as an upper case rune // so that both `http2xx` and `HTTP2XX` can be converted to `http_2xx`. buf.WriteRune(unicode.ToLower(r1)) buf.WriteRune(connector) buf.WriteRune(r0) } else { buf.WriteRune(connector) buf.WriteRune(unicode.ToLower(r1)) buf.WriteRune(r0) } break } buf.WriteRune(unicode.ToLower(r1)) } if len(str) == 0 || r0 == connector { buf.WriteRune(unicode.ToLower(r0)) } case unicode.IsNumber(r0): if prev != connector && !unicode.IsNumber(prev) { buf.WriteRune(connector) } buf.WriteRune(r0) default: if r0 == ' ' || r0 == '-' || r0 == '_' { r0 = connector } buf.WriteRune(r0) } } return buf.String() } // SwapCase will swap characters case from upper to lower or lower to upper. func SwapCase(str string) string { var r rune var size int buf := &bytes.Buffer{} for len(str) > 0 { r, size = utf8.DecodeRuneInString(str) switch { case unicode.IsUpper(r): buf.WriteRune(unicode.ToLower(r)) case unicode.IsLower(r): buf.WriteRune(unicode.ToUpper(r)) default: buf.WriteRune(r) } str = str[size:] } return buf.String() } // FirstRuneToUpper converts first rune to upper case if necessary. func FirstRuneToUpper(str string) string { if str == "" { return str } r, size := utf8.DecodeRuneInString(str) if !unicode.IsLower(r) { return str } buf := &bytes.Buffer{} buf.WriteRune(unicode.ToUpper(r)) buf.WriteString(str[size:]) return buf.String() } // FirstRuneToLower converts first rune to lower case if necessary. func FirstRuneToLower(str string) string { if str == "" { return str } r, size := utf8.DecodeRuneInString(str) if !unicode.IsUpper(r) { return str } buf := &bytes.Buffer{} buf.WriteRune(unicode.ToLower(r)) buf.WriteString(str[size:]) return buf.String() } // Shuffle randomizes runes in a string and returns the result. // It uses default random source in `math/rand`. func Shuffle(str string) string { if str == "" { return str } runes := []rune(str) index := 0 for i := len(runes) - 1; i > 0; i-- { index = rand.Intn(i + 1) if i != index { runes[i], runes[index] = runes[index], runes[i] } } return string(runes) } // ShuffleSource randomizes runes in a string with given random source. func ShuffleSource(str string, src rand.Source) string { if str == "" { return str } runes := []rune(str) index := 0 r := rand.New(src) for i := len(runes) - 1; i > 0; i-- { index = r.Intn(i + 1) if i != index { runes[i], runes[index] = runes[index], runes[i] } } return string(runes) } // Successor returns the successor to string. // // If there is one alphanumeric rune is found in string, increase the rune by 1. // If increment generates a "carry", the rune to the left of it is incremented. // This process repeats until there is no carry, adding an additional rune if necessary. // // If there is no alphanumeric rune, the rightmost rune will be increased by 1 // regardless whether the result is a valid rune or not. // // Only following characters are alphanumeric. // * a - z // * A - Z // * 0 - 9 // // Samples (borrowed from ruby's String#succ document): // "abcd" => "abce" // "THX1138" => "THX1139" // "<>" => "<>" // "1999zzz" => "2000aaa" // "ZZZ9999" => "AAAA0000" // "***" => "**+" func Successor(str string) string { if str == "" { return str } var r rune var i int carry := ' ' runes := []rune(str) l := len(runes) lastAlphanumeric := l for i = l - 1; i >= 0; i-- { r = runes[i] if ('a' <= r && r <= 'y') || ('A' <= r && r <= 'Y') || ('0' <= r && r <= '8') { runes[i]++ carry = ' ' lastAlphanumeric = i break } switch r { case 'z': runes[i] = 'a' carry = 'a' lastAlphanumeric = i case 'Z': runes[i] = 'A' carry = 'A' lastAlphanumeric = i case '9': runes[i] = '0' carry = '0' lastAlphanumeric = i } } // Needs to add one character for carry. if i < 0 && carry != ' ' { buf := &bytes.Buffer{} buf.Grow(l + 4) // Reserve enough space for write. if lastAlphanumeric != 0 { buf.WriteString(str[:lastAlphanumeric]) } buf.WriteRune(carry) for _, r = range runes[lastAlphanumeric:] { buf.WriteRune(r) } return buf.String() } // No alphanumeric character. Simply increase last rune's value. if lastAlphanumeric == l { runes[l-1]++ } return string(runes) } xstrings-1.2.1/convert_test.go000066400000000000000000000112551356537375200164660ustar00rootroot00000000000000// Copyright 2015 Huan Du. All rights reserved. // Licensed under the MIT license that can be found in the LICENSE file. package xstrings import ( "sort" "strings" "testing" ) func TestToSnakeCaseAndToKebabCase(t *testing.T) { cases := _M{ "HTTPServer": "http_server", "_camelCase": "_camel_case", "NoHTTPS": "no_https", "Wi_thF": "wi_th_f", "_AnotherTES_TCaseP": "_another_tes_t_case_p", "ALL": "all", "_HELLO_WORLD_": "_hello_world_", "HELLO_WORLD": "hello_world", "HELLO____WORLD": "hello____world", "TW": "tw", "_C": "_c", "http2xx": "http_2xx", "HTTP2XX": "http_2xx", "HTTP20xOK": "http_20x_ok", "HTTP20XStatus": "http_20x_status", "HTTP-20xStatus": "http_20x_status", "a": "a", " sentence case ": "__sentence_case__", " Mixed-hyphen case _and SENTENCE_case and UPPER-case": "_mixed_hyphen_case__and_sentence_case_and_upper_case", "": "", "Abc\uFFFDE\uFFFDf\uFFFDd\uFFFD2\uFFFD00Z\uFFFDZZ\uFFFDZZ": "abc\uFFFD_e\uFFFDf\uFFFDd\uFFFD_2\uFFFD_00z\uFFFD_zz\uFFFD_zz", } runTestCases(t, ToSnakeCase, cases) for k, v := range cases { cases[k] = strings.Replace(v, "_", "-", -1) } runTestCases(t, ToKebabCase, cases) } func TestToCamelCase(t *testing.T) { runTestCases(t, ToCamelCase, _M{ "http_server": "HttpServer", "_camel_case": "_CamelCase", "no_https": "NoHttps", "_complex__case_": "_Complex_Case_", "all": "All", "GOLANG_IS_GREAT": "GolangIsGreat", "GOLANG": "Golang", "a": "A", "好": "好", "": "", }) } func TestSwapCase(t *testing.T) { runTestCases(t, SwapCase, _M{ "swapCase": "SWAPcASE", "Θ~λa云Ξπ": "θ~ΛA云ξΠ", "a": "A", "": "", }) } func TestFirstRuneToUpper(t *testing.T) { runTestCases(t, FirstRuneToUpper, _M{ "hello, world!": "Hello, world!", "Hello, world!": "Hello, world!", "你好,世界!": "你好,世界!", "a": "A", "": "", }) } func TestFirstRuneToLower(t *testing.T) { runTestCases(t, FirstRuneToLower, _M{ "hello, world!": "hello, world!", "Hello, world!": "hello, world!", "你好,世界!": "你好,世界!", "a": "a", "A": "a", "": "", }) } func TestShuffle(t *testing.T) { // It seems there is no reliable way to test shuffled string. // Runner just make sure shuffled string has the same runes as origin string. runner := func(str string) string { s := Shuffle(str) slice := sort.StringSlice(strings.Split(s, "")) slice.Sort() return strings.Join(slice, "") } runTestCases(t, runner, _M{ "": "", "facgbheidjk": "abcdefghijk", "尝试中文": "中尝文试", "zh英文hun排": "hhnuz排文英", }) } type testShuffleSource int // A generated random number sequance just for testing. var testShuffleTable = []int64{ 1874068156324778273, 3328451335138149956, 5263531936693774911, 7955079406183515637, 2703501726821866378, 2740103009342231109, 6941261091797652072, 1905388747193831650, 7981306761429961588, 6426100070888298971, 4831389563158288344, 261049867304784443, 1460320609597786623, 5600924393587988459, 8995016276575641803, 732830328053361739, 5486140987150761883, 545291762129038907, 6382800227808658932, 2781055864473387780, 1598098976185383115, 4990765271833742716, 5018949295715050020, 2568779411109623071, 3902890183311134652, 4893789450120281907, 2338498362660772719, 2601737961087659062, 7273596521315663110, 3337066551442961397, 8121576815539813105, 2740376916591569721, 8249030965139585917, 898860202204764712, 9010467728050264449, 685213522303989579, 2050257992909156333, 6281838661429879825, 2227583514184312746, 2873287401706343734, } func (src testShuffleSource) Int63() int64 { n := testShuffleTable[int(src)%len(testShuffleTable)] src++ return n } func (src testShuffleSource) Seed(int64) {} func TestShuffleSource(t *testing.T) { var src testShuffleSource runner := func(str string) string { return ShuffleSource(str, src) } runTestCases(t, runner, _M{ "": "", "facgbheidjk": "bakefjgichd", "尝试中文怎么样": "怎试中样尝么文", "zh英文hun排": "hh英nzu文排", }) } func TestSuccessor(t *testing.T) { runTestCases(t, Successor, _M{ "": "", "abcd": "abce", "THX1138": "THX1139", "<>": "<>", "1999zzz": "2000aaa", "ZZZ9999": "AAAA0000", "***": "**+", "来点中文试试": "来点中文试诖", "中cZ英ZZ文zZ混9zZ9杂99进z位": "中dA英AA文aA混0aA0杂00进a位", }) } xstrings-1.2.1/count.go000066400000000000000000000043761356537375200151050ustar00rootroot00000000000000// Copyright 2015 Huan Du. All rights reserved. // Licensed under the MIT license that can be found in the LICENSE file. package xstrings import ( "unicode" "unicode/utf8" ) // Len returns str's utf8 rune length. func Len(str string) int { return utf8.RuneCountInString(str) } // WordCount returns number of words in a string. // // Word is defined as a locale dependent string containing alphabetic characters, // which may also contain but not start with `'` and `-` characters. func WordCount(str string) int { var r rune var size, n int inWord := false for len(str) > 0 { r, size = utf8.DecodeRuneInString(str) switch { case isAlphabet(r): if !inWord { inWord = true n++ } case inWord && (r == '\'' || r == '-'): // Still in word. default: inWord = false } str = str[size:] } return n } const minCJKCharacter = '\u3400' // Checks r is a letter but not CJK character. func isAlphabet(r rune) bool { if !unicode.IsLetter(r) { return false } switch { // Quick check for non-CJK character. case r < minCJKCharacter: return true // Common CJK characters. case r >= '\u4E00' && r <= '\u9FCC': return false // Rare CJK characters. case r >= '\u3400' && r <= '\u4D85': return false // Rare and historic CJK characters. case r >= '\U00020000' && r <= '\U0002B81D': return false } return true } // Width returns string width in monotype font. // Multi-byte characters are usually twice the width of single byte characters. // // Algorithm comes from `mb_strwidth` in PHP. // http://php.net/manual/en/function.mb-strwidth.php func Width(str string) int { var r rune var size, n int for len(str) > 0 { r, size = utf8.DecodeRuneInString(str) n += RuneWidth(r) str = str[size:] } return n } // RuneWidth returns character width in monotype font. // Multi-byte characters are usually twice the width of single byte characters. // // Algorithm comes from `mb_strwidth` in PHP. // http://php.net/manual/en/function.mb-strwidth.php func RuneWidth(r rune) int { switch { case r == utf8.RuneError || r < '\x20': return 0 case '\x20' <= r && r < '\u2000': return 1 case '\u2000' <= r && r < '\uFF61': return 2 case '\uFF61' <= r && r < '\uFFA0': return 1 case '\uFFA0' <= r: return 2 } return 0 } xstrings-1.2.1/count_test.go000066400000000000000000000022211356537375200161270ustar00rootroot00000000000000// Copyright 2015 Huan Du. All rights reserved. // Licensed under the MIT license that can be found in the LICENSE file. package xstrings import ( "fmt" "testing" ) func TestLen(t *testing.T) { runner := func(str string) string { return fmt.Sprint(Len(str)) } runTestCases(t, runner, _M{ "abcdef": "6", "中文": "2", "中yin文hun排": "9", "": "0", }) } func TestWordCount(t *testing.T) { runner := func(str string) string { return fmt.Sprint(WordCount(str)) } runTestCases(t, runner, _M{ "one word: λ": "3", "中文": "0", "你好,sekai!": "1", "oh, it's super-fancy!!a": "4", "": "0", "-": "0", "it's-'s": "1", }) } func TestWidth(t *testing.T) { runner := func(str string) string { return fmt.Sprint(Width(str)) } runTestCases(t, runner, _M{ "abcd\t0123\n7890": "12", "中zh英eng文混排": "15", "": "0", }) } func TestRuneWidth(t *testing.T) { runner := func(str string) string { return fmt.Sprint(RuneWidth([]rune(str)[0])) } runTestCases(t, runner, _M{ "a": "1", "中": "2", "\x11": "0", }) } xstrings-1.2.1/doc.go000066400000000000000000000006001356537375200145040ustar00rootroot00000000000000// Copyright 2015 Huan Du. All rights reserved. // Licensed under the MIT license that can be found in the LICENSE file. // Package xstrings is to provide string algorithms which are useful but not included in `strings` package. // See project home page for details. https://github.com/huandu/xstrings // // Package xstrings assumes all strings are encoded in utf8. package xstrings xstrings-1.2.1/format.go000066400000000000000000000100741356537375200152350ustar00rootroot00000000000000// Copyright 2015 Huan Du. All rights reserved. // Licensed under the MIT license that can be found in the LICENSE file. package xstrings import ( "bytes" "unicode/utf8" ) // ExpandTabs can expand tabs ('\t') rune in str to one or more spaces dpending on // current column and tabSize. // The column number is reset to zero after each newline ('\n') occurring in the str. // // ExpandTabs uses RuneWidth to decide rune's width. // For example, CJK characters will be treated as two characters. // // If tabSize <= 0, ExpandTabs panics with error. // // Samples: // ExpandTabs("a\tbc\tdef\tghij\tk", 4) => "a bc def ghij k" // ExpandTabs("abcdefg\thij\nk\tl", 4) => "abcdefg hij\nk l" // ExpandTabs("z中\t文\tw", 4) => "z中 文 w" func ExpandTabs(str string, tabSize int) string { if tabSize <= 0 { panic("tab size must be positive") } var r rune var i, size, column, expand int var output *bytes.Buffer orig := str for len(str) > 0 { r, size = utf8.DecodeRuneInString(str) if r == '\t' { expand = tabSize - column%tabSize if output == nil { output = allocBuffer(orig, str) } for i = 0; i < expand; i++ { output.WriteByte(byte(' ')) } column += expand } else { if r == '\n' { column = 0 } else { column += RuneWidth(r) } if output != nil { output.WriteRune(r) } } str = str[size:] } if output == nil { return orig } return output.String() } // LeftJustify returns a string with pad string at right side if str's rune length is smaller than length. // If str's rune length is larger than length, str itself will be returned. // // If pad is an empty string, str will be returned. // // Samples: // LeftJustify("hello", 4, " ") => "hello" // LeftJustify("hello", 10, " ") => "hello " // LeftJustify("hello", 10, "123") => "hello12312" func LeftJustify(str string, length int, pad string) string { l := Len(str) if l >= length || pad == "" { return str } remains := length - l padLen := Len(pad) output := &bytes.Buffer{} output.Grow(len(str) + (remains/padLen+1)*len(pad)) output.WriteString(str) writePadString(output, pad, padLen, remains) return output.String() } // RightJustify returns a string with pad string at left side if str's rune length is smaller than length. // If str's rune length is larger than length, str itself will be returned. // // If pad is an empty string, str will be returned. // // Samples: // RightJustify("hello", 4, " ") => "hello" // RightJustify("hello", 10, " ") => " hello" // RightJustify("hello", 10, "123") => "12312hello" func RightJustify(str string, length int, pad string) string { l := Len(str) if l >= length || pad == "" { return str } remains := length - l padLen := Len(pad) output := &bytes.Buffer{} output.Grow(len(str) + (remains/padLen+1)*len(pad)) writePadString(output, pad, padLen, remains) output.WriteString(str) return output.String() } // Center returns a string with pad string at both side if str's rune length is smaller than length. // If str's rune length is larger than length, str itself will be returned. // // If pad is an empty string, str will be returned. // // Samples: // Center("hello", 4, " ") => "hello" // Center("hello", 10, " ") => " hello " // Center("hello", 10, "123") => "12hello123" func Center(str string, length int, pad string) string { l := Len(str) if l >= length || pad == "" { return str } remains := length - l padLen := Len(pad) output := &bytes.Buffer{} output.Grow(len(str) + (remains/padLen+1)*len(pad)) writePadString(output, pad, padLen, remains/2) output.WriteString(str) writePadString(output, pad, padLen, (remains+1)/2) return output.String() } func writePadString(output *bytes.Buffer, pad string, padLen, remains int) { var r rune var size int repeats := remains / padLen for i := 0; i < repeats; i++ { output.WriteString(pad) } remains = remains % padLen if remains != 0 { for i := 0; i < remains; i++ { r, size = utf8.DecodeRuneInString(pad) output.WriteRune(r) pad = pad[size:] } } } xstrings-1.2.1/format_test.go000066400000000000000000000056111356537375200162750ustar00rootroot00000000000000// Copyright 2015 Huan Du. All rights reserved. // Licensed under the MIT license that can be found in the LICENSE file. package xstrings import ( "strconv" "strings" "testing" ) func TestExpandTabs(t *testing.T) { runner := func(str string) (result string) { defer func() { if e := recover(); e != nil { result = e.(string) } }() input := strings.Split(str, separator) n, _ := strconv.Atoi(input[1]) return ExpandTabs(input[0], n) } runTestCases(t, runner, _M{ sep("a\tbc\tdef\tghij\tk", "4"): "a bc def ghij k", sep("abcdefg\thij\nk\tl", "4"): "abcdefg hij\nk l", sep("z中\t文\tw", "4"): "z中 文 w", sep("abcdef", "4"): "abcdef", sep("abc\td\tef\tghij\nk\tl", "3"): "abc d ef ghij\nk l", sep("abc\td\tef\tghij\nk\tl", "1"): "abc d ef ghij\nk l", sep("abc", "0"): "tab size must be positive", sep("abc", "-1"): "tab size must be positive", }) } func TestLeftJustify(t *testing.T) { runner := func(str string) string { input := strings.Split(str, separator) n, _ := strconv.Atoi(input[1]) return LeftJustify(input[0], n, input[2]) } runTestCases(t, runner, _M{ sep("hello", "4", " "): "hello", sep("hello", "10", " "): "hello ", sep("hello", "10", "123"): "hello12312", sep("hello中文test", "4", " "): "hello中文test", sep("hello中文test", "12", " "): "hello中文test ", sep("hello中文test", "18", "测试!"): "hello中文test测试!测试!测", sep("hello中文test", "0", "123"): "hello中文test", sep("hello中文test", "18", ""): "hello中文test", }) } func TestRightJustify(t *testing.T) { runner := func(str string) string { input := strings.Split(str, separator) n, _ := strconv.Atoi(input[1]) return RightJustify(input[0], n, input[2]) } runTestCases(t, runner, _M{ sep("hello", "4", " "): "hello", sep("hello", "10", " "): " hello", sep("hello", "10", "123"): "12312hello", sep("hello中文test", "4", " "): "hello中文test", sep("hello中文test", "12", " "): " hello中文test", sep("hello中文test", "18", "测试!"): "测试!测试!测hello中文test", sep("hello中文test", "0", "123"): "hello中文test", sep("hello中文test", "18", ""): "hello中文test", }) } func TestCenter(t *testing.T) { runner := func(str string) string { input := strings.Split(str, separator) n, _ := strconv.Atoi(input[1]) return Center(input[0], n, input[2]) } runTestCases(t, runner, _M{ sep("hello", "4", " "): "hello", sep("hello", "10", " "): " hello ", sep("hello", "10", "123"): "12hello123", sep("hello中文test", "4", " "): "hello中文test", sep("hello中文test", "12", " "): "hello中文test ", sep("hello中文test", "18", "测试!"): "测试!hello中文test测试!测", sep("hello中文test", "0", "123"): "hello中文test", sep("hello中文test", "18", ""): "hello中文test", }) } xstrings-1.2.1/go.mod000066400000000000000000000000531356537375200145200ustar00rootroot00000000000000module github.com/huandu/xstrings go 1.12 xstrings-1.2.1/manipulate.go000066400000000000000000000104421356537375200161030ustar00rootroot00000000000000// Copyright 2015 Huan Du. All rights reserved. // Licensed under the MIT license that can be found in the LICENSE file. package xstrings import ( "bytes" "strings" "unicode/utf8" ) // Reverse a utf8 encoded string. func Reverse(str string) string { var size int tail := len(str) buf := make([]byte, tail) s := buf for len(str) > 0 { _, size = utf8.DecodeRuneInString(str) tail -= size s = append(s[:tail], []byte(str[:size])...) str = str[size:] } return string(buf) } // Slice a string by rune. // // Start must satisfy 0 <= start <= rune length. // // End can be positive, zero or negative. // If end >= 0, start and end must satisfy start <= end <= rune length. // If end < 0, it means slice to the end of string. // // Otherwise, Slice will panic as out of range. func Slice(str string, start, end int) string { var size, startPos, endPos int origin := str if start < 0 || end > len(str) || (end >= 0 && start > end) { panic("out of range") } if end >= 0 { end -= start } for start > 0 && len(str) > 0 { _, size = utf8.DecodeRuneInString(str) start-- startPos += size str = str[size:] } if end < 0 { return origin[startPos:] } endPos = startPos for end > 0 && len(str) > 0 { _, size = utf8.DecodeRuneInString(str) end-- endPos += size str = str[size:] } if len(str) == 0 && (start > 0 || end > 0) { panic("out of range") } return origin[startPos:endPos] } // Partition splits a string by sep into three parts. // The return value is a slice of strings with head, match and tail. // // If str contains sep, for example "hello" and "l", Partition returns // "he", "l", "lo" // // If str doesn't contain sep, for example "hello" and "x", Partition returns // "hello", "", "" func Partition(str, sep string) (head, match, tail string) { index := strings.Index(str, sep) if index == -1 { head = str return } head = str[:index] match = str[index : index+len(sep)] tail = str[index+len(sep):] return } // LastPartition splits a string by last instance of sep into three parts. // The return value is a slice of strings with head, match and tail. // // If str contains sep, for example "hello" and "l", LastPartition returns // "hel", "l", "o" // // If str doesn't contain sep, for example "hello" and "x", LastPartition returns // "", "", "hello" func LastPartition(str, sep string) (head, match, tail string) { index := strings.LastIndex(str, sep) if index == -1 { tail = str return } head = str[:index] match = str[index : index+len(sep)] tail = str[index+len(sep):] return } // Insert src into dst at given rune index. // Index is counted by runes instead of bytes. // // If index is out of range of dst, panic with out of range. func Insert(dst, src string, index int) string { return Slice(dst, 0, index) + src + Slice(dst, index, -1) } // Scrub scrubs invalid utf8 bytes with repl string. // Adjacent invalid bytes are replaced only once. func Scrub(str, repl string) string { var buf *bytes.Buffer var r rune var size, pos int var hasError bool origin := str for len(str) > 0 { r, size = utf8.DecodeRuneInString(str) if r == utf8.RuneError { if !hasError { if buf == nil { buf = &bytes.Buffer{} } buf.WriteString(origin[:pos]) hasError = true } } else if hasError { hasError = false buf.WriteString(repl) origin = origin[pos:] pos = 0 } pos += size str = str[size:] } if buf != nil { buf.WriteString(origin) return buf.String() } // No invalid byte. return origin } // WordSplit splits a string into words. Returns a slice of words. // If there is no word in a string, return nil. // // Word is defined as a locale dependent string containing alphabetic characters, // which may also contain but not start with `'` and `-` characters. func WordSplit(str string) []string { var word string var words []string var r rune var size, pos int inWord := false for len(str) > 0 { r, size = utf8.DecodeRuneInString(str) switch { case isAlphabet(r): if !inWord { inWord = true word = str pos = 0 } case inWord && (r == '\'' || r == '-'): // Still in word. default: if inWord { inWord = false words = append(words, word[:pos]) } } pos += size str = str[size:] } if inWord { words = append(words, word[:pos]) } return words } xstrings-1.2.1/manipulate_test.go000066400000000000000000000112321356537375200171400ustar00rootroot00000000000000// Copyright 2015 Huan Du. All rights reserved. // Licensed under the MIT license that can be found in the LICENSE file. package xstrings import ( "strconv" "strings" "testing" ) func TestReverse(t *testing.T) { runTestCases(t, Reverse, _M{ "reverse string": "gnirts esrever", "中文如何?": "?何如文中", "中en文混~排怎样?a": "a?样怎排~混文ne中", }) } func TestSlice(t *testing.T) { runner := func(str string) (result string) { defer func() { if e := recover(); e != nil { result = e.(string) } }() strs := split(str) start, _ := strconv.ParseInt(strs[1], 10, 0) end, _ := strconv.ParseInt(strs[2], 10, 0) result = Slice(strs[0], int(start), int(end)) return } runTestCases(t, runner, _M{ sep("abcdefghijk", "3", "8"): "defgh", sep("来点中文如何?", "2", "7"): "中文如何?", sep("中en文混~排总是少不了的a", "2", "8"): "n文混~排总", sep("中en文混~排总是少不了的a", "0", "0"): "", sep("中en文混~排总是少不了的a", "14", "14"): "", sep("中en文混~排总是少不了的a", "5", "-1"): "~排总是少不了的a", sep("中en文混~排总是少不了的a", "14", "-1"): "", sep("let us slice out of range", "-3", "3"): "out of range", sep("超出范围哦", "2", "6"): "out of range", sep("don't do this", "3", "2"): "out of range", sep("千gan万de不piao要liang", "19", "19"): "out of range", }) } func TestPartition(t *testing.T) { runner := func(str string) string { input := strings.Split(str, separator) head, match, tail := Partition(input[0], input[1]) return sep(head, match, tail) } runTestCases(t, runner, _M{ sep("hello", "l"): sep("he", "l", "lo"), sep("中文总少不了", "少"): sep("中文总", "少", "不了"), sep("z这个zh英文混排hao不", "h英文"): sep("z这个z", "h英文", "混排hao不"), sep("边界tiao件zen能忘", "边界"): sep("", "边界", "tiao件zen能忘"), sep("尾巴ye别忘le", "忘le"): sep("尾巴ye别", "忘le", ""), sep("hello", "x"): sep("hello", "", ""), sep("不是晩香玉", "晚"): sep("不是晩香玉", "", ""), // Hint: 晩 is not 晚 :) sep("来ge混排ba", "e 混"): sep("来ge混排ba", "", ""), }) } func TestLastPartition(t *testing.T) { runner := func(str string) string { input := strings.Split(str, separator) head, match, tail := LastPartition(input[0], input[1]) return sep(head, match, tail) } runTestCases(t, runner, _M{ sep("hello", "l"): sep("hel", "l", "o"), sep("少量中文总少不了", "少"): sep("少量中文总", "少", "不了"), sep("z这个zh英文ch英文混排hao不", "h英文"): sep("z这个zh英文c", "h英文", "混排hao不"), sep("边界tiao件zen能忘边界", "边界"): sep("边界tiao件zen能忘", "边界", ""), sep("尾巴ye别忘le", "尾巴"): sep("", "尾巴", "ye别忘le"), sep("hello", "x"): sep("", "", "hello"), sep("不是晩香玉", "晚"): sep("", "", "不是晩香玉"), // Hint: 晩 is not 晚 :) sep("来ge混排ba", "e 混"): sep("", "", "来ge混排ba"), }) } func TestInsert(t *testing.T) { runner := func(str string) (result string) { defer func() { if e := recover(); e != nil { result = e.(string) } }() strs := split(str) index, _ := strconv.ParseInt(strs[2], 10, 0) result = Insert(strs[0], strs[1], int(index)) return } runTestCases(t, runner, _M{ sep("abcdefg", "hi", "3"): "abchidefg", sep("少量中文是必须的", "混pai", "4"): "少量中文混pai是必须的", sep("zh英文hun排", "~!", "5"): "zh英文h~!un排", sep("插在beginning", "我", "0"): "我插在beginning", sep("插在ending", "我", "8"): "插在ending我", sep("超tian出yuan边tu界po", "foo", "-1"): "out of range", sep("超tian出yuan边tu界po", "foo", "17"): "out of range", }) } func TestScrub(t *testing.T) { runner := func(str string) string { strs := split(str) return Scrub(strs[0], strs[1]) } runTestCases(t, runner, _M{ sep("ab\uFFFDcd\xFF\xCEefg\xFF\xFC\xFD\xFAhijk", "*"): "ab*cd*efg*hijk", sep("no错误です", "*"): "no错误です", sep("", "*"): "", }) } func TestWordSplit(t *testing.T) { runner := func(str string) string { return sep(WordSplit(str)...) } runTestCases(t, runner, _M{ "one word": sep("one", "word"), "一个字:把他给我拿下!": "", "it's a super-fancy one!!!a": sep("it's", "a", "super-fancy", "one", "a"), "a -b-c' 'd'e": sep("a", "b-c'", "d'e"), }) } xstrings-1.2.1/translate.go000066400000000000000000000275361356537375200157550ustar00rootroot00000000000000// Copyright 2015 Huan Du. All rights reserved. // Licensed under the MIT license that can be found in the LICENSE file. package xstrings import ( "bytes" "unicode" "unicode/utf8" ) type runeRangeMap struct { FromLo rune // Lower bound of range map. FromHi rune // An inclusive higher bound of range map. ToLo rune ToHi rune } type runeDict struct { Dict [unicode.MaxASCII + 1]rune } type runeMap map[rune]rune // Translator can translate string with pre-compiled from and to patterns. // If a from/to pattern pair needs to be used more than once, it's recommended // to create a Translator and reuse it. type Translator struct { quickDict *runeDict // A quick dictionary to look up rune by index. Only available for latin runes. runeMap runeMap // Rune map for translation. ranges []*runeRangeMap // Ranges of runes. mappedRune rune // If mappedRune >= 0, all matched runes are translated to the mappedRune. reverted bool // If to pattern is empty, all matched characters will be deleted. hasPattern bool } // NewTranslator creates new Translator through a from/to pattern pair. func NewTranslator(from, to string) *Translator { tr := &Translator{} if from == "" { return tr } reverted := from[0] == '^' deletion := len(to) == 0 if reverted { from = from[1:] } var fromStart, fromEnd, fromRangeStep rune var toStart, toEnd, toRangeStep rune var fromRangeSize, toRangeSize rune var singleRunes []rune // Update the to rune range. updateRange := func() { // No more rune to read in the to rune pattern. if toEnd == utf8.RuneError { return } if toRangeStep == 0 { to, toStart, toEnd, toRangeStep = nextRuneRange(to, toEnd) return } // Current range is not empty. Consume 1 rune from start. if toStart != toEnd { toStart += toRangeStep return } // No more rune. Repeat the last rune. if to == "" { toEnd = utf8.RuneError return } // Both start and end are used. Read two more runes from the to pattern. to, toStart, toEnd, toRangeStep = nextRuneRange(to, utf8.RuneError) } if deletion { toStart = utf8.RuneError toEnd = utf8.RuneError } else { // If from pattern is reverted, only the last rune in the to pattern will be used. if reverted { var size int for len(to) > 0 { toStart, size = utf8.DecodeRuneInString(to) to = to[size:] } toEnd = utf8.RuneError } else { to, toStart, toEnd, toRangeStep = nextRuneRange(to, utf8.RuneError) } } fromEnd = utf8.RuneError for len(from) > 0 { from, fromStart, fromEnd, fromRangeStep = nextRuneRange(from, fromEnd) // fromStart is a single character. Just map it with a rune in the to pattern. if fromRangeStep == 0 { singleRunes = tr.addRune(fromStart, toStart, singleRunes) updateRange() continue } for toEnd != utf8.RuneError && fromStart != fromEnd { // If mapped rune is a single character instead of a range, simply shift first // rune in the range. if toRangeStep == 0 { singleRunes = tr.addRune(fromStart, toStart, singleRunes) updateRange() fromStart += fromRangeStep continue } fromRangeSize = (fromEnd - fromStart) * fromRangeStep toRangeSize = (toEnd - toStart) * toRangeStep // Not enough runes in the to pattern. Need to read more. if fromRangeSize > toRangeSize { fromStart, toStart = tr.addRuneRange(fromStart, fromStart+toRangeSize*fromRangeStep, toStart, toEnd, singleRunes) fromStart += fromRangeStep updateRange() // Edge case: If fromRangeSize == toRangeSize + 1, the last fromStart value needs be considered // as a single rune. if fromStart == fromEnd { singleRunes = tr.addRune(fromStart, toStart, singleRunes) updateRange() } continue } fromStart, toStart = tr.addRuneRange(fromStart, fromEnd, toStart, toStart+fromRangeSize*toRangeStep, singleRunes) updateRange() break } if fromStart == fromEnd { fromEnd = utf8.RuneError continue } fromStart, toStart = tr.addRuneRange(fromStart, fromEnd, toStart, toStart, singleRunes) fromEnd = utf8.RuneError } if fromEnd != utf8.RuneError { singleRunes = tr.addRune(fromEnd, toStart, singleRunes) } tr.reverted = reverted tr.mappedRune = -1 tr.hasPattern = true // Translate RuneError only if in deletion or reverted mode. if deletion || reverted { tr.mappedRune = toStart } return tr } func (tr *Translator) addRune(from, to rune, singleRunes []rune) []rune { if from <= unicode.MaxASCII { if tr.quickDict == nil { tr.quickDict = &runeDict{} } tr.quickDict.Dict[from] = to } else { if tr.runeMap == nil { tr.runeMap = make(runeMap) } tr.runeMap[from] = to } singleRunes = append(singleRunes, from) return singleRunes } func (tr *Translator) addRuneRange(fromLo, fromHi, toLo, toHi rune, singleRunes []rune) (rune, rune) { var r rune var rrm *runeRangeMap if fromLo < fromHi { rrm = &runeRangeMap{ FromLo: fromLo, FromHi: fromHi, ToLo: toLo, ToHi: toHi, } } else { rrm = &runeRangeMap{ FromLo: fromHi, FromHi: fromLo, ToLo: toHi, ToHi: toLo, } } // If there is any single rune conflicts with this rune range, clear single rune record. for _, r = range singleRunes { if rrm.FromLo <= r && r <= rrm.FromHi { if r <= unicode.MaxASCII { tr.quickDict.Dict[r] = 0 } else { delete(tr.runeMap, r) } } } tr.ranges = append(tr.ranges, rrm) return fromHi, toHi } func nextRuneRange(str string, last rune) (remaining string, start, end rune, rangeStep rune) { var r rune var size int remaining = str escaping := false isRange := false for len(remaining) > 0 { r, size = utf8.DecodeRuneInString(remaining) remaining = remaining[size:] // Parse special characters. if !escaping { if r == '\\' { escaping = true continue } if r == '-' { // Ignore slash at beginning of string. if last == utf8.RuneError { continue } start = last isRange = true continue } } escaping = false if last != utf8.RuneError { // This is a range which start and end are the same. // Considier it as a normal character. if isRange && last == r { isRange = false continue } start = last end = r if isRange { if start < end { rangeStep = 1 } else { rangeStep = -1 } } return } last = r } start = last end = utf8.RuneError return } // Translate str with a from/to pattern pair. // // See comment in Translate function for usage and samples. func (tr *Translator) Translate(str string) string { if !tr.hasPattern || str == "" { return str } var r rune var size int var needTr bool orig := str var output *bytes.Buffer for len(str) > 0 { r, size = utf8.DecodeRuneInString(str) r, needTr = tr.TranslateRune(r) if needTr && output == nil { output = allocBuffer(orig, str) } if r != utf8.RuneError && output != nil { output.WriteRune(r) } str = str[size:] } // No character is translated. if output == nil { return orig } return output.String() } // TranslateRune return translated rune and true if r matches the from pattern. // If r doesn't match the pattern, original r is returned and translated is false. func (tr *Translator) TranslateRune(r rune) (result rune, translated bool) { switch { case tr.quickDict != nil: if r <= unicode.MaxASCII { result = tr.quickDict.Dict[r] if result != 0 { translated = true if tr.mappedRune >= 0 { result = tr.mappedRune } break } } fallthrough case tr.runeMap != nil: var ok bool if result, ok = tr.runeMap[r]; ok { translated = true if tr.mappedRune >= 0 { result = tr.mappedRune } break } fallthrough default: var rrm *runeRangeMap ranges := tr.ranges for i := len(ranges) - 1; i >= 0; i-- { rrm = ranges[i] if rrm.FromLo <= r && r <= rrm.FromHi { translated = true if tr.mappedRune >= 0 { result = tr.mappedRune break } if rrm.ToLo < rrm.ToHi { result = rrm.ToLo + r - rrm.FromLo } else if rrm.ToLo > rrm.ToHi { // ToHi can be smaller than ToLo if range is from higher to lower. result = rrm.ToLo - r + rrm.FromLo } else { result = rrm.ToLo } break } } } if tr.reverted { if !translated { result = tr.mappedRune } translated = !translated } if !translated { result = r } return } // HasPattern returns true if Translator has one pattern at least. func (tr *Translator) HasPattern() bool { return tr.hasPattern } // Translate str with the characters defined in from replaced by characters defined in to. // // From and to are patterns representing a set of characters. Pattern is defined as following. // // * Special characters // * '-' means a range of runes, e.g. // * "a-z" means all characters from 'a' to 'z' inclusive; // * "z-a" means all characters from 'z' to 'a' inclusive. // * '^' as first character means a set of all runes excepted listed, e.g. // * "^a-z" means all characters except 'a' to 'z' inclusive. // * '\' escapes special characters. // * Normal character represents itself, e.g. "abc" is a set including 'a', 'b' and 'c'. // // Translate will try to find a 1:1 mapping from from to to. // If to is smaller than from, last rune in to will be used to map "out of range" characters in from. // // Note that '^' only works in the from pattern. It will be considered as a normal character in the to pattern. // // If the to pattern is an empty string, Translate works exactly the same as Delete. // // Samples: // Translate("hello", "aeiou", "12345") => "h2ll4" // Translate("hello", "a-z", "A-Z") => "HELLO" // Translate("hello", "z-a", "a-z") => "svool" // Translate("hello", "aeiou", "*") => "h*ll*" // Translate("hello", "^l", "*") => "**ll*" // Translate("hello ^ world", `\^lo`, "*") => "he*** * w*r*d" func Translate(str, from, to string) string { tr := NewTranslator(from, to) return tr.Translate(str) } // Delete runes in str matching the pattern. // Pattern is defined in Translate function. // // Samples: // Delete("hello", "aeiou") => "hll" // Delete("hello", "a-k") => "llo" // Delete("hello", "^a-k") => "he" func Delete(str, pattern string) string { tr := NewTranslator(pattern, "") return tr.Translate(str) } // Count how many runes in str match the pattern. // Pattern is defined in Translate function. // // Samples: // Count("hello", "aeiou") => 3 // Count("hello", "a-k") => 3 // Count("hello", "^a-k") => 2 func Count(str, pattern string) int { if pattern == "" || str == "" { return 0 } var r rune var size int var matched bool tr := NewTranslator(pattern, "") cnt := 0 for len(str) > 0 { r, size = utf8.DecodeRuneInString(str) str = str[size:] if _, matched = tr.TranslateRune(r); matched { cnt++ } } return cnt } // Squeeze deletes adjacent repeated runes in str. // If pattern is not empty, only runes matching the pattern will be squeezed. // // Samples: // Squeeze("hello", "") => "helo" // Squeeze("hello", "m-z") => "hello" // Squeeze("hello world", " ") => "hello world" func Squeeze(str, pattern string) string { var last, r rune var size int var skipSqueeze, matched bool var tr *Translator var output *bytes.Buffer orig := str last = -1 if len(pattern) > 0 { tr = NewTranslator(pattern, "") } for len(str) > 0 { r, size = utf8.DecodeRuneInString(str) // Need to squeeze the str. if last == r && !skipSqueeze { if tr != nil { if _, matched = tr.TranslateRune(r); !matched { skipSqueeze = true } } if output == nil { output = allocBuffer(orig, str) } if skipSqueeze { output.WriteRune(r) } } else { if output != nil { output.WriteRune(r) } last = r skipSqueeze = false } str = str[size:] } if output == nil { return orig } return output.String() } xstrings-1.2.1/translate_test.go000066400000000000000000000055351356537375200170070ustar00rootroot00000000000000// Copyright 2015 Huan Du. All rights reserved. // Licensed under the MIT license that can be found in the LICENSE file. package xstrings import ( "fmt" "strings" "testing" ) func TestTranslate(t *testing.T) { runner := func(str string) string { input := strings.Split(str, separator) return Translate(input[0], input[1], input[2]) } runTestCases(t, runner, _M{ sep("hello", "aeiou", "12345"): "h2ll4", sep("hello", "aeiou", ""): "hll", sep("hello", "a-z", "A-Z"): "HELLO", sep("hello", "z-a", "a-z"): "svool", sep("hello", "aeiou", "*"): "h*ll*", sep("hello", "^l", "*"): "**ll*", sep("hello", "p-z", "*"): "hello", sep("hello ^ world", `\^lo`, "*"): "he*** * w*r*d", sep("中文字符测试", "文中谁敢试?", "123456"): "21字符测5", sep("中文字符测试", "^文中谁敢试?", "123456"): "中文666试", sep("中文字符测试", "字-试", "0-9"): "中90999", sep("h1e2l3l4o, w5o6r7l8d", "a-z,0-9", `A-Z\-a-czk-p`): "HbEcLzLkO- WlOmRnLoD", sep("h1e2l3l4o, w5o6r7l8d", "a-zoh-n", "b-zakt-z"): "t1f2x3x4k, x5k6s7x8e", sep("h1e2l3l4o, w5o6r7l8d", "helloa-zoh-n", "99999b-zakt-z"): "t1f2x3x4k, x5k6s7x8e", sep("hello", "e-", "p"): "hpllo", sep("hello", "-e-", "p"): "hpllo", sep("hello", "----e---", "p"): "hpllo", sep("hello", "^---e----", "p"): "peppp", sep("hel\uFFFDlo", "\uFFFD", "H"): "helHlo", sep("hel\uFFFDlo", "^\uFFFD", "H"): "HHHHH", sep("hel\uFFFDlo", "o-\uFFFDh", "H"): "HelHlH", }) } func TestDelete(t *testing.T) { runner := func(str string) string { input := strings.Split(str, separator) return Delete(input[0], input[1]) } runTestCases(t, runner, _M{ sep("hello", "aeiou"): "hll", sep("hello", "a-k"): "llo", sep("hello", "^a-k"): "he", sep("中文字符测试", "文中谁敢试?"): "字符测", }) } func TestCount(t *testing.T) { runner := func(str string) string { input := strings.Split(str, separator) return fmt.Sprint(Count(input[0], input[1])) } runTestCases(t, runner, _M{ sep("hello", "aeiou"): "2", sep("hello", "a-k"): "2", sep("hello", "^a-k"): "3", sep("中文字符测试", "文中谁敢试?"): "3", }) } func TestSqueeze(t *testing.T) { runner := func(str string) string { input := strings.Split(str, separator) return Squeeze(input[0], input[1]) } runTestCases(t, runner, _M{ sep("hello", ""): "helo", sep("hello world", ""): "helo world", sep("hello world", " "): "hello world", sep("hello world", " "): "hello world", sep("hello", "a-k"): "hello", sep("hello", "^a-k"): "helo", sep("hello", "^a-l"): "hello", sep("foooo baaaaar", "a"): "foooo bar", sep("打打打打个劫!!", ""): "打个劫!", sep("打打打打个劫!!", "打"): "打个劫!!", }) } xstrings-1.2.1/util_test.go000066400000000000000000000011531356537375200157570ustar00rootroot00000000000000// Copyright 2015 Huan Du. All rights reserved. // Licensed under the MIT license that can be found in the LICENSE file. package xstrings import ( "strings" "testing" ) type _M map[string]string const ( separator = " ¶ " ) func runTestCases(t *testing.T, converter func(string) string, cases map[string]string) { for k, v := range cases { s := converter(k) if s != v { t.Fatalf("case fails. [case:%v]\nshould => %#v\nactual => %#v", k, v, s) } } } func sep(strs ...string) string { return strings.Join(strs, separator) } func split(str string) []string { return strings.Split(str, separator) }