Skip to content

Commit

Permalink
feat: add stringx to lang pkg
Browse files Browse the repository at this point in the history
Change-Id: I9adfcab50c7d0526ab46a26ed4941cd5e0f1b0c2
  • Loading branch information
zhongkaixiang.kaka committed May 13, 2021
1 parent 6a6cce9 commit 88c8172
Show file tree
Hide file tree
Showing 11 changed files with 1,203 additions and 0 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,7 @@

# Dependency directories (remove the comment below to include it)
# vendor/

# IDE or Editor's config
.idea/

16 changes: 16 additions & 0 deletions lang/stringx/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# stringx

## Introduction
Extension/Helper of String Operation.

## Features
- Transform(Camel/Snake Case, Reverse, Rotate, Shuffle ...)
- Construction(Pad, Repeat...)
- Matching(IsBlank, IsEmpty, IsAlphanumeric, IsNumeric ...)







212 changes: 212 additions & 0 deletions lang/stringx/case.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
// Copyright 2021 ByteDance Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package stringx

import (
"strings"
"unicode"
"unicode/utf8"
)

// ToCamelCase Converts all the delimiter separated words in a String into camelCase,
// that is each word is made up of a titlecase character and then a series of lowercase characters.
// See https://en.wikipedia.org/wiki/Camel_case
func ToCamelCase(s string, isUpper bool, delimiters ...rune) string {
if s == "" {
return s
}

// Generate DelimiterSet
var delimiterSet map[rune]struct{}
if len(delimiters) == 0 {
delimiterSet = emptyRuneSet
} else {
delimiterSet = newRuneSet(delimiters...)
}

sb := strings.Builder{}
sb.Grow(len(s) / 2)
var r0 rune
var size int

restStr := s
// try to capitalize first none delimiter character
for len(restStr) > 0 {
r0, size = utf8.DecodeRuneInString(restStr)
restStr = restStr[size:]
if _, exists := delimiterSet[r0]; exists {
continue
}
if isUpper {
_, _ = sb.WriteRune(unicode.ToUpper(r0))
} else {
_, _ = sb.WriteRune(unicode.ToLower(r0))
}
break
}

// process rest string
capitalizeNext := false
for len(restStr) > 0 {
r0, size = utf8.DecodeRuneInString(restStr)
restStr = restStr[size:]
if _, exists := delimiterSet[r0]; exists {
capitalizeNext = true
continue
}
if capitalizeNext {
_, _ = sb.WriteRune(unicode.ToUpper(r0))
capitalizeNext = false
continue
}
_, _ = sb.WriteRune(unicode.ToLower(r0))
}
return sb.String()
}

// ToUpperCamelCase alias for ToCamelCase(s, true, delimiters...)
func ToUpperCamelCase(s string, delimiters ...rune) string {
return ToCamelCase(s, true, delimiters...)
}

// ToLowerCamelCase alias for ToCamelCase(s, false, delimiters...)
func ToLowerCamelCase(s string, delimiters ...rune) string {
return ToCamelCase(s, false, delimiters...)
}

const (
underscoreRune = '_'
)

const (
snakeCaseRuneTagInit = iota
snakeCaseRuneTagDelimiter
snakeCaseRuneTagUpper
snakeCaseRuneTagLower

snakeCaseRuneTagOthers
)

// ToSnakeCase can convert all upper case characters in a string to
// snake case format.
func ToSnakeCase(s string, toUpper bool, delimitersToBeReplaced ...rune) string {
if s == "" {
return s
}

var delimiterSet map[rune]struct{}
if len(delimitersToBeReplaced) > 0 {
delimiterSet = newRuneSet(delimitersToBeReplaced...)
}

var transRuneFunc func(r rune) rune
if toUpper {
transRuneFunc = unicode.ToUpper
} else {
transRuneFunc = unicode.ToLower
}

sb := strings.Builder{}
sb.Grow(len(s))
var r0, rTmp rune
var size int
restStr := s

// scan from second element
var curRuneTag int
prevRuneTag := snakeCaseRuneTagInit
for len(restStr) > 0 {
r0, size = utf8.DecodeRuneInString(restStr)
restStr = restStr[size:]
if needReplaceWithUnderscoreForSnakeCase(r0, delimiterSet) {
_, _ = sb.WriteRune(underscoreRune)
prevRuneTag = snakeCaseRuneTagDelimiter
continue
}

curRuneTag = getToSnakeTag(r0)
if prevRuneTag == snakeCaseRuneTagDelimiter || prevRuneTag == snakeCaseRuneTagInit {
_, _ = sb.WriteRune(transRuneFunc(r0))
prevRuneTag = curRuneTag
continue
}
if curRuneTag != snakeCaseRuneTagUpper {
_, _ = sb.WriteRune(transRuneFunc(r0))
prevRuneTag = curRuneTag
continue
}
//-- When curRune is Upper
if prevRuneTag == snakeCaseRuneTagUpper {
if len(restStr) > 0 {
rTmp, size = utf8.DecodeRuneInString(restStr)
if unicode.IsLower(rTmp) {
// new head for current rune
restStr = restStr[size:]
_, _ = sb.WriteRune(underscoreRune)
_, _ = sb.WriteRune(transRuneFunc(r0))
_, _ = sb.WriteRune(transRuneFunc(rTmp))
prevRuneTag = snakeCaseRuneTagLower
continue
}
}
_, _ = sb.WriteRune(transRuneFunc(r0))
prevRuneTag = curRuneTag
continue
}

// as new head
_, _ = sb.WriteRune(underscoreRune)
_, _ = sb.WriteRune(transRuneFunc(r0))
prevRuneTag = curRuneTag
}
return sb.String()
}

func getToSnakeTag(value rune) int {
if unicode.IsUpper(value) {
return snakeCaseRuneTagUpper
}
if unicode.IsLower(value) {
return snakeCaseRuneTagLower
}
return snakeCaseRuneTagOthers
}

func needReplaceWithUnderscoreForSnakeCase(r rune, delimiterSet map[rune]struct{}) bool {
if r == underscoreRune {
return true
}
if delimiterSet != nil {
if _, exists := delimiterSet[r]; exists {
return true
}
}
if unicode.IsSpace(r) {
return true
}
return false
}

// ToUpperSnakeCase can convert all upper case characters in a string to,
// alias for ToSnakeCase(s, true, delimitersToBeReplaced...)
func ToUpperSnakeCase(s string, delimitersToBeReplaced ...rune) string {
return ToSnakeCase(s, true, delimitersToBeReplaced...)
}

// ToLowerSnakeCase can convert all lower case characters in a string to,
// alias for ToSnakeCase(s, false, delimitersToBeReplaced...)
func ToLowerSnakeCase(s string, delimitersToBeReplaced ...rune) string {
return ToSnakeCase(s, false, delimitersToBeReplaced...)
}
151 changes: 151 additions & 0 deletions lang/stringx/case_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
// Copyright 2021 ByteDance Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package stringx

import (
"strings"
"testing"

"github.com/stretchr/testify/assert"
)

func TestCamelCase(t *testing.T) {
is := assert.New(t)

type testStruct struct {
input string
delimiters []rune
expected string
}

lowerCaseList := []testStruct{
{"", nil, ""},
{"", newTestDelimiters(' '), ""},
{"A", nil, "a"},
{"a", nil, "a"},
{"some_words", nil, "some_words"},
{"someWordsAlpha", nil, "somewordsalpha"},
{"some_words", newTestDelimiters('_'), "someWords"},
{"Some_words", newTestDelimiters('_'), "someWords"},
{"http_-server", newTestDelimiters('_', '-'), "httpServer"},
{"no_https", newTestDelimiters('_'), "noHttps"},
{"_complex__case_", nil, "_complex__case_"},
{"_complex__case_", newTestDelimiters('_'), "complexCase"},
}
for _, testCase := range lowerCaseList {
is.Equal(testCase.expected, ToCamelCase(testCase.input, false, testCase.delimiters...))
is.Equal(testCase.expected, ToLowerCamelCase(testCase.input, testCase.delimiters...))
}

upperCaseList := []testStruct{
{"", nil, ""},
{"", newTestDelimiters(' '), ""},
{"a", nil, "A"},
{"A", nil, "A"},
{"some_words", nil, "Some_words"},
{"someWordsAlpha", nil, "Somewordsalpha"},
{"some_words", newTestDelimiters('_'), "SomeWords"},
{"http-_server", newTestDelimiters('-', '_'), "HttpServer"},
{"no-https", newTestDelimiters('-'), "NoHttps"},
{"_complex__case_", nil, "_complex__case_"},
{"_complex__case_", newTestDelimiters('_'), "ComplexCase"},
}
for _, testCase := range upperCaseList {
is.Equal(testCase.expected, ToCamelCase(testCase.input, true, testCase.delimiters...))
is.Equal(testCase.expected, ToUpperCamelCase(testCase.input, testCase.delimiters...))
}
}

func TestSnakeCase(t *testing.T) {
is := assert.New(t)

type testStruct struct {
input string
delimiters []rune
expected string
}

lowerCaseList := []testStruct{
{"", nil, ""},
{" \t\n", nil, "______"},
{"HTTPServer", nil, "http_server"},
{"GinHttpServer", nil, "gin_http_server"},
{"GinHTTPServer", nil, "gin_http_server"},
{"GinHTTP2Server", nil, "gin_http2_server"},
{"GinHTTPS2erver", nil, "gin_https2erver"},

{"_camelCase", nil, "_camel_case"},
{"NoHTTPS", nil, "no_https"},
{"Wi_thF", nil, "wi_th_f"},
{"_AnotherTES_TCaseP", nil, "_another_tes_t_case_p"},
{"ALL", nil, "all"},
{"_HELLO_WORLD_", nil, "_hello_world_"},
{"HELLO_WORLD", nil, "hello_world"},
{"HELLO____WORLD", nil, "hello____world"},
{"CN", nil, "cn"},
{"_C", nil, "_c"},
{"http2xx", nil, "http2xx"},
{"HTTP2XX", nil, "http2_xx"},
{"HTTP20xOK", nil, "http20x_ok"},
{"HTTP20xStatus", nil, "http20x_status"},
{"HTTP-20xStatus", nil, "http-20x_status"},
{"HTTP-20xStatus", newTestDelimiters('-'), "http_20x_status"},
{"a", nil, "a"},
{"Duration2m3s", nil, "duration2m3s"},
{"Bld4Floor3rd", nil, "bld4_floor3rd"},
{" _-_ ", nil, "__-__"},
{" _-_ ", newTestDelimiters('-'), "_____"},
{"a1b2c3d", nil, "a1b2c3d"},
{"A//B%%2c", nil, "a//_b%%2c"},
{"HTTP状态码404/502Error", nil, "http状态码404/502_error"},
{"中文(字符)", nil, "中文(字符)"},
{"混合ABCWords与123数字456", nil, "混合_abc_words与123数字456"},
{" sentence case ", nil, "__sentence_case__"},

{" Mixed-hyphen case _and SENTENCE_case and UPPER-case",
newTestDelimiters('-'),
"_mixed_hyphen_case__and_sentence_case_and_upper_case",
},

{"FROM CamelCase to snake/kebab-case", nil, "from_camel_case_to_snake/kebab-case"},
{"FROM CamelCase to snake/kebab-case", newTestDelimiters('-'), "from_camel_case_to_snake/kebab_case"},
}
for _, testCase := range lowerCaseList {
is.Equal(testCase.expected, ToSnakeCase(testCase.input, false, testCase.delimiters...))
is.Equal(testCase.expected, ToLowerSnakeCase(testCase.input, testCase.delimiters...))
}

upperCaseList := make([]testStruct, 0, len(lowerCaseList))
for _, testCase := range lowerCaseList {
upperCaseList = append(
upperCaseList,
testStruct{
input: testCase.input,
delimiters: testCase.delimiters,
expected: strings.ToUpper(testCase.expected),
},
)
}
for _, testCase := range upperCaseList {
is.Equal(testCase.expected, ToSnakeCase(testCase.input, true, testCase.delimiters...))
is.Equal(testCase.expected, ToUpperSnakeCase(testCase.input, testCase.delimiters...))
}

}

func newTestDelimiters(delimiters ...rune) []rune {
result := make([]rune, 0, len(delimiters))
return append(result, delimiters...)
}
Loading

0 comments on commit 88c8172

Please sign in to comment.