diff --git a/.gitignore b/.gitignore index 66fd13c9..8ca996c4 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,7 @@ # Dependency directories (remove the comment below to include it) # vendor/ + +# IDE or Editor's config +.idea/ + diff --git a/lang/stringx/README.md b/lang/stringx/README.md new file mode 100644 index 00000000..021bee77 --- /dev/null +++ b/lang/stringx/README.md @@ -0,0 +1,9 @@ +# stringx + +## Introduction +Extension/Helper of String Operation. + +## Features +- Transform(Reverse, Rotate, Shuffle ...) +- Construction(Pad, Repeat...) +- Matching(IsAlpha, IsAlphanumeric, IsNumeric ...) diff --git a/lang/stringx/doc.go b/lang/stringx/doc.go new file mode 100644 index 00000000..e0a7ce7d --- /dev/null +++ b/lang/stringx/doc.go @@ -0,0 +1,16 @@ +// Copyright 2021 ByteDance Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package stringx provides extensions of string Operation. +package stringx diff --git a/lang/stringx/exmaple_test.go b/lang/stringx/exmaple_test.go new file mode 100644 index 00000000..75069a56 --- /dev/null +++ b/lang/stringx/exmaple_test.go @@ -0,0 +1,76 @@ +// Copyright 2021 ByteDance Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package stringx + +import ( + "fmt" + "unicode/utf8" +) + +func Example_sub() { + fmt.Printf("Sub-[0:100]=%s\n", Sub("", 0, 100)) + fmt.Printf("Sub-facgbheidjk[3:9]=%s\n", Sub("facgbheidjk", 3, 9)) + fmt.Printf("Sub-facgbheidjk[-50:100]=%s\n", Sub("facgbheidjk", -50, 100)) + fmt.Printf("Sub-facgbheidjk[-3:length]=%s\n", Sub("facgbheidjk", -3, utf8.RuneCountInString("facgbheidjk"))) + fmt.Printf("Sub-facgbheidjk[-3:-1]=%s\n", Sub("facgbheidjk", -3, -1)) + fmt.Printf("Sub-zh英文hun排[2:5]=%s\n", Sub("zh英文hun排", 2, 5)) + fmt.Printf("Sub-zh英文hun排[2:-1]=%s\n", Sub("zh英文hun排", 2, -1)) + fmt.Printf("Sub-zh英文hun排[-100:-1]=%s\n", Sub("zh英文hun排", -100, -1)) + fmt.Printf("Sub-zh英文hun排[-100:-90]=%s\n", Sub("zh英文hun排", -100, -90)) + fmt.Printf("Sub-zh英文hun排[-10:-90]=%s\n", Sub("zh英文hun排", -10, -90)) + + // Output: + // Sub-[0:100]= + // Sub-facgbheidjk[3:9]=gbheid + // Sub-facgbheidjk[-50:100]=facgbheidjk + // Sub-facgbheidjk[-3:length]=djk + // Sub-facgbheidjk[-3:-1]=dj + // Sub-zh英文hun排[2:5]=英文h + // Sub-zh英文hun排[2:-1]=英文hun + // Sub-zh英文hun排[-100:-1]=zh英文hun + // Sub-zh英文hun排[-100:-90]= + // Sub-zh英文hun排[-10:-90]= +} + +func Example_substart() { + fmt.Printf("SubStart-[0:]=%s\n", SubStart("", 0)) + fmt.Printf("SubStart-[2:]=%s\n", SubStart("", 2)) + fmt.Printf("SubStart-facgbheidjk[3:]=%s\n", SubStart("facgbheidjk", 3)) + fmt.Printf("SubStart-facgbheidjk[-50:]=%s\n", SubStart("facgbheidjk", -50)) + fmt.Printf("SubStart-facgbheidjk[-3:]=%s\n", SubStart("facgbheidjk", -3)) + fmt.Printf("SubStart-zh英文hun排[3:]=%s\n", SubStart("zh英文hun排", 3)) + + // Output: + // SubStart-[0:]= + // SubStart-[2:]= + // SubStart-facgbheidjk[3:]=gbheidjk + // SubStart-facgbheidjk[-50:]=facgbheidjk + // SubStart-facgbheidjk[-3:]=djk + // SubStart-zh英文hun排[3:]=文hun排 +} + +func Example_pad() { + + fmt.Printf("PadLeft=[%s]\n", PadLeftSpace("abc", 7)) + fmt.Printf("PadLeft=[%s]\n", PadLeftChar("abc", 7, '-')) + fmt.Printf("PadCenter=[%s]\n", PadCenterChar("abc", 7, '-')) + fmt.Printf("PadCenter=[%s]\n", PadCenterChar("abcd", 7, '-')) + + // Output: + // PadLeft=[ abc] + // PadLeft=[----abc] + // PadCenter=[--abc--] + // PadCenter=[-abcd--] +} diff --git a/lang/stringx/is.go b/lang/stringx/is.go new file mode 100644 index 00000000..3dca401c --- /dev/null +++ b/lang/stringx/is.go @@ -0,0 +1,54 @@ +// Copyright 2021 ByteDance Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package stringx + +import ( + "unicode" +) + +// IsAlpha checks if the string contains only unicode letters. +func IsAlpha(s string) bool { + for _, v := range s { + if !unicode.IsLetter(v) { + return false + } + } + return true +} + +// IsAlphanumeric checks if the string contains only Unicode letters or digits. +func IsAlphanumeric(s string) bool { + for _, v := range s { + if isAlphanumeric(v) { + continue + } + return false + } + return true +} + +// IsNumeric Checks if the string contains only digits. A decimal point is not a digit and returns false. +func IsNumeric(s string) bool { + for _, v := range s { + if !unicode.IsDigit(v) { + return false + } + } + return true +} + +func isAlphanumeric(v rune) bool { + return unicode.IsDigit(v) || unicode.IsLetter(v) +} diff --git a/lang/stringx/is_test.go b/lang/stringx/is_test.go new file mode 100644 index 00000000..11ad386f --- /dev/null +++ b/lang/stringx/is_test.go @@ -0,0 +1,37 @@ +// Copyright 2021 ByteDance Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package stringx + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestIs(t *testing.T) { + is := assert.New(t) + + is.False(IsNumeric(" bob ")) + is.True(IsNumeric("123")) + + is.False(IsAlpha("123")) + is.True(IsAlpha("Voa")) + is.True(IsAlpha("bròwn")) + + is.True(IsAlphanumeric("Voa")) + is.True(IsAlphanumeric("123")) + is.True(IsAlphanumeric("v123oa")) + is.False(IsAlphanumeric("v123oa,")) +} diff --git a/lang/stringx/stringx.go b/lang/stringx/stringx.go new file mode 100644 index 00000000..fcef1f9c --- /dev/null +++ b/lang/stringx/stringx.go @@ -0,0 +1,285 @@ +// Copyright 2021 ByteDance Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package stringx + +import ( + "errors" + "math" + "strings" + "unicode/utf8" + + "github.com/bytedance/gopkg/internal/hack" + "github.com/bytedance/gopkg/lang/fastrand" +) + +// Error pre define +var ( + ErrDecodeRune = errors.New("error occurred on rune decoding") +) + +// PadLeftChar left pad a string with a specified character in a larger string (specified size). +// if the size is less than the param string, the param string is returned. +// note: size is unicode size. +func PadLeftChar(s string, size int, ch rune) string { + return padCharLeftOrRight(s, size, ch, true) +} + +// PadLeftChar left pad a string with space character(' ') in a larger string(specified size). +// if the size is less than the param string, the param string is returned. +// note: size is unicode size. +func PadLeftSpace(s string, size int) string { + return PadLeftChar(s, size, ' ') +} + +// PadRightChar right pad a string with a specified character in a larger string(specified size). +// if the size is less than the param string, the param string is returned. +// note: size is unicode size. +func PadRightChar(s string, size int, ch rune) string { + return padCharLeftOrRight(s, size, ch, false) +} + +// PadLeftChar right pad a string with space character(' ') in a large string(specified size). +// if the size is less than the param string, the param string is returned. +// note: size is unicode size. +func PadRightSpace(s string, size int) string { + return PadRightChar(s, size, ' ') +} + +// PadCenterChar center pad a string with a specified character in a larger string(specified size). +// if the size is less than the param string, the param string is returned. +// note: size is unicode size. +func PadCenterChar(s string, size int, ch rune) string { + if size <= 0 { + return s + } + length := utf8.RuneCountInString(s) + pads := size - length + if pads <= 0 { + return s + } + + // pad left + leftPads := pads / 2 + if leftPads > 0 { + s = padRawLeftChar(s, ch, leftPads) + } + // pad right + rightPads := size - leftPads - length + if rightPads > 0 { + s = padRawRightChar(s, ch, rightPads) + } + return s +} + +// PadCenterChar center pad a string with space character(' ') in a larger string(specified size). +// if the size is less than the param string, the param string is returned. +// note: size is unicode size. +func PadCenterSpace(s string, size int) string { + return PadCenterChar(s, size, ' ') +} + +func padCharLeftOrRight(s string, size int, ch rune, isLeft bool) string { + if size <= 0 { + return s + } + pads := size - utf8.RuneCountInString(s) + if pads <= 0 { + return s + } + if isLeft { + return padRawLeftChar(s, ch, pads) + } + return padRawRightChar(s, ch, pads) +} + +func padRawLeftChar(s string, ch rune, padSize int) string { + return RepeatChar(ch, padSize) + s +} + +func padRawRightChar(s string, ch rune, padSize int) string { + return s + RepeatChar(ch, padSize) +} + +// RepeatChar returns padding using the specified delimiter repeated to a given length. +func RepeatChar(ch rune, repeat int) string { + if repeat <= 0 { + return "" + } + sb := strings.Builder{} + sb.Grow(repeat) + for i := 0; i < repeat; i++ { + sb.WriteRune(ch) + } + return sb.String() +} + +// RemoveChar removes all occurrences of a specified character from the string. +func RemoveChar(s string, rmVal rune) string { + if s == "" { + return s + } + sb := strings.Builder{} + sb.Grow(len(s) / 2) + + for _, v := range s { + if v != rmVal { + sb.WriteRune(v) + } + } + return sb.String() +} + +// RemoveString removes all occurrences of a substring from the string. +func RemoveString(s, rmStr string) string { + if s == "" || rmStr == "" { + return s + } + return strings.ReplaceAll(s, rmStr, "") +} + +// Rotate rotates(circular shift) a string of shift characters. +func Rotate(s string, shift int) string { + if shift == 0 { + return s + } + sLen := len(s) + if sLen == 0 { + return s + } + + shiftMod := shift % sLen + if shiftMod == 0 { + return s + } + + offset := -(shiftMod) + sb := strings.Builder{} + sb.Grow(sLen) + _, _ = sb.WriteString(SubStart(s, offset)) + _, _ = sb.WriteString(Sub(s, 0, offset)) + return sb.String() +} + +// Sub returns substring from specified string avoiding panics with index start and end. +// start, end are based on unicode(utf8) count +func Sub(s string, start, end int) string { + return sub(s, start, end) +} + +// SubStart returns substring from specified string avoiding panics with start. +// start, end are based on unicode(utf8) count +func SubStart(s string, start int) string { + return sub(s, start, math.MaxInt64) +} + +func sub(s string, start, end int) string { + if s == "" { + return "" + } + + unicodeLen := utf8.RuneCountInString(s) + // end + if end < 0 { + end += unicodeLen + } + if end > unicodeLen { + end = unicodeLen + } + // start + if start < 0 { + start += unicodeLen + } + if start > end { + return "" + } + + // start <= end + if start < 0 { + start = 0 + } + if end < 0 { + end = 0 + } + if start == 0 && end == unicodeLen { + return s + } + + sb := strings.Builder{} + sb.Grow(end - start) + runeIndex := 0 + for _, v := range s { + if runeIndex >= end { + break + } + if runeIndex >= start { + sb.WriteRune(v) + } + runeIndex++ + } + return sb.String() +} + +// Reverse reverses a string +func Reverse(s string) string { + result, _ := ReverseE(s) + return result +} + +// ReverseE reverses a string with error status returned +func ReverseE(s string) (string, error) { + if s == "" { + return s, nil + } + src := hack.StringToBytes(s) + dst := make([]byte, len(s)) + srcIndex := len(s) + dstIndex := 0 + for srcIndex > 0 { + r, n := utf8.DecodeLastRune(src[:srcIndex]) + if r == utf8.RuneError { + return hack.BytesToString(dst), ErrDecodeRune + } + utf8.EncodeRune(dst[dstIndex:], r) + srcIndex -= n + dstIndex += n + } + return hack.BytesToString(dst), nil +} + +// Shuffle shuffles runes in a string and returns. +func Shuffle(s string) string { + if s == "" { + return s + } + runes := []rune(s) + index := 0 + for i := len(runes) - 1; i > 0; i-- { + index = fastrand.Intn(i + 1) + if i != index { + runes[i], runes[index] = runes[index], runes[i] + } + } + return string(runes) +} + +// ContainsAnySubstrings returns whether s contains any of substring in slice. +func ContainsAnySubstrings(s string, subs []string) bool { + for _, v := range subs { + if strings.Contains(s, v) { + return true + } + } + return false +} diff --git a/lang/stringx/stringx_test.go b/lang/stringx/stringx_test.go new file mode 100644 index 00000000..1819a459 --- /dev/null +++ b/lang/stringx/stringx_test.go @@ -0,0 +1,230 @@ +// Copyright 2021 ByteDance Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package stringx + +import ( + "sort" + "strings" + "testing" + "unicode/utf8" + + "github.com/stretchr/testify/assert" +) + +func TestPad(t *testing.T) { + type testData struct { + input string + padChar rune + size int + leftExpected string + leftExpectedSpace string + + rightExpected string + rightExpectedSpace string + + centerExpected string + centerExpectedSpace string + } + + testCases := []testData{ + { + "", '-', 4, + "----", " ", + "----", " ", + "----", " ", + }, + { + "abc", '-', 0, + "abc", "abc", + "abc", "abc", + "abc", "abc", + }, + { + "abc", '-', 2, + "abc", "abc", + "abc", "abc", + "abc", "abc", + }, + { + "abc", '-', 4, + "-abc", " abc", + "abc-", "abc ", + "abc-", "abc ", + }, + { + "abc", '-', 5, + "--abc", " abc", + "abc--", "abc ", + "-abc-", " abc ", + }, + { + "abc", '-', 6, + "---abc", " abc", + "abc---", "abc ", + "-abc--", " abc ", + }, + { + "abc", '-', 7, + "----abc", " abc", + "abc----", "abc ", + "--abc--", " abc ", + }, + + { + "abcd", '-', 7, + "---abcd", " abcd", + "abcd---", "abcd ", + "-abcd--", " abcd ", + }, + } + + is := assert.New(t) + for _, testCase := range testCases { + is.Equal(testCase.leftExpected, PadLeftChar(testCase.input, testCase.size, testCase.padChar)) + is.Equal(testCase.leftExpectedSpace, PadLeftSpace(testCase.input, testCase.size)) + + is.Equal(testCase.rightExpected, PadRightChar(testCase.input, testCase.size, testCase.padChar)) + is.Equal(testCase.rightExpectedSpace, PadRightSpace(testCase.input, testCase.size)) + + is.Equal(testCase.centerExpected, PadCenterChar(testCase.input, testCase.size, testCase.padChar)) + is.Equal(testCase.centerExpectedSpace, PadCenterSpace(testCase.input, testCase.size)) + } +} + +func TestRemove(t *testing.T) { + is := assert.New(t) + is.Equal("", RemoveChar("", 'h')) + is.Equal("z英文un排", RemoveChar("zh英文hunh排", 'h')) + is.Equal("zh英hun排", RemoveChar("zh英文hun文排", '文')) + + is.Equal("", RemoveString("", "文hun")) + is.Equal("zh英文hun排", RemoveString("zh英文hun排", "")) + is.Equal("zh英排", RemoveString("zh英文hun排", "文hun")) + is.Equal("zh英文hun排", RemoveString("zh英文hun排", "")) +} + +func TestRepeat(t *testing.T) { + is := assert.New(t) + is.Equal("", RepeatChar('-', 0)) + is.Equal("----", RepeatChar('-', 4)) + is.Equal(" ", RepeatChar(' ', 3)) +} + +func TestRotate(t *testing.T) { + is := assert.New(t) + + is.Equal("", Rotate("", 2)) + + is.Equal("abc", Rotate("abc", 0)) + is.Equal("abc", Rotate("abc", 3)) + is.Equal("abc", Rotate("abc", 6)) + + is.Equal("cab", Rotate("abc", 1)) + is.Equal("bca", Rotate("abc", -1)) +} + +func TestReverse(t *testing.T) { + tests := []struct { + input string + expected string + }{ + {"", ""}, + {"abc", "cba"}, + {"a", "a"}, + {"çınar", "ranıç"}, + {" yağmur", "rumğay "}, + {"επαγγελματίες", "ςείταμλεγγαπε"}, + } + for _, test := range tests { + output := Reverse(test.input) + assert.Equalf(t, test.expected, output, "Test case %s is not successful\n", test.input) + } + + assert.Equal(t, Reverse(""), "") + assert.Equal(t, Reverse("X"), "X") + assert.Equal(t, Reverse("\u0301b"), "b\u0301") + assert.Equal(t, Reverse("😎⚽"), "⚽😎") + assert.Equal(t, Reverse("Les Mise\u0301rables"), "selbar\u0301esiM seL") + assert.Equal(t, Reverse("ab\u0301cde"), "edc\u0301ba") + assert.Equal(t, Reverse("The quick bròwn 狐 jumped over the lazy 犬"), "犬 yzal eht revo depmuj 狐 nwòrb kciuq ehT") + _, err := ReverseE(string([]byte{128, 128, 128, 128, 0})) + assert.Equal(t, ErrDecodeRune, err) +} + +func TestSub(t *testing.T) { + type testData struct { + input string + start int + end int + expected string + } + + newTestCase := func(intput string, start, end int, expected string) testData { + return testData{ + input: intput, + start: start, + end: end, + expected: expected, + } + } + + testCases := []testData{ + newTestCase("", 0, 100, ""), + newTestCase("facgbheidjk", 3, 9, "gbheid"), + newTestCase("facgbheidjk", -50, 100, "facgbheidjk"), + newTestCase("facgbheidjk", -3, utf8.RuneCountInString("facgbheidjk"), "djk"), + newTestCase("facgbheidjk", -3, -1, "dj"), + newTestCase("zh英文hun排", 2, 5, "英文h"), + newTestCase("zh英文hun排", 2, -1, "英文hun"), + newTestCase("zh英文hun排", -100, -1, "zh英文hun"), + newTestCase("zh英文hun排", -100, -90, ""), + newTestCase("zh英文hun排", -10, -90, ""), + } + + is := assert.New(t) + for _, testCase := range testCases { + is.Equal(testCase.expected, Sub(testCase.input, testCase.start, testCase.end)) + } +} + +func TestContainsAnySubstrings(t *testing.T) { + is := assert.New(t) + is.True(ContainsAnySubstrings("abcdefg", []string{"a", "b"})) + is.True(ContainsAnySubstrings("abcdefg", []string{"a", "z"})) + is.False(ContainsAnySubstrings("abcdefg", []string{"ac", "z"})) + is.False(ContainsAnySubstrings("abcdefg", []string{"x", "z"})) +} + +func TestShuffle(t *testing.T) { + is := assert.New(t) + + shuffleAndSort := func(str string) string { + s := Shuffle(str) + slice := sort.StringSlice(strings.Split(s, "")) + slice.Sort() + return strings.Join(slice, "") + } + + strMap := map[string]string{ + "": "", + "facgbheidjk": "abcdefghijk", + "尝试中文": "中尝文试", + "zh英文hun排": "hhnuz排文英", + } + for input, expected := range strMap { + actual := shuffleAndSort(input) + is.Equal(expected, actual) + } +}