From f0886df0bb1e784616a60512d2621528f720d4b7 Mon Sep 17 00:00:00 2001 From: Huan Du Date: Sat, 11 Apr 2020 19:54:44 +0800 Subject: [PATCH] refs #1: update rule to convert "Bld4Floor3rd" to "bld4_floor_3rd" --- convert.go | 344 +++++++++++++++++++++++++++++++++++++----------- convert_test.go | 13 +- 2 files changed, 276 insertions(+), 81 deletions(-) diff --git a/convert.go b/convert.go index 3686780..4ba8986 100644 --- a/convert.go +++ b/convert.go @@ -75,15 +75,16 @@ func ToCamelCase(str string) string { // snake case format. // // Some samples. -// "FirstName" => "first_name" -// "HTTPServer" => "http_server" -// "NoHTTPS" => "no_https" -// "GO_PATH" => "go_path" -// "GO PATH" => "go_path" // space is converted to underscore. -// "GO-PATH" => "go_path" // hyphen is converted to underscore. -// "HTTP2XX" => "http_2xx" // insert an underscore before a number and after an alphabet. -// "http2xx" => "http_2xx" -// "HTTP20xOK" => "http_20x_ok" +// "FirstName" => "first_name" +// "HTTPServer" => "http_server" +// "NoHTTPS" => "no_https" +// "GO_PATH" => "go_path" +// "GO PATH" => "go_path" // space is converted to underscore. +// "GO-PATH" => "go_path" // hyphen is converted to underscore. +// "http2xx" => "http_2xx" // insert an underscore before a number and after an alphabet. +// "HTTP20xOK" => "http_20x_ok" +// "Duration2m3s" => "duration_2m3s" +// "Bld4Floor3rd" => "bld4_floor_3rd" func ToSnakeCase(str string) string { return camelCaseToLowerCase(str, '_') } @@ -92,15 +93,16 @@ func ToSnakeCase(str string) string { // kebab case format. // // Some samples. -// "FirstName" => "first-name" -// "HTTPServer" => "http-server" -// "NoHTTPS" => "no-https" -// "GO_PATH" => "go-path" -// "GO PATH" => "go-path" // space is converted to '-'. -// "GO-PATH" => "go-path" // hyphen is converted to '-'. -// "HTTP2XX" => "http-2xx" // insert a '-' before a number and after an alphabet. -// "http2xx" => "http-2xx" -// "HTTP20xOK" => "http-20x-ok" +// "FirstName" => "first-name" +// "HTTPServer" => "http-server" +// "NoHTTPS" => "no-https" +// "GO_PATH" => "go-path" +// "GO PATH" => "go-path" // space is converted to '-'. +// "GO-PATH" => "go-path" // hyphen is converted to '-'. +// "http2xx" => "http-2xx" // insert an underscore before a number and after an alphabet. +// "HTTP20xOK" => "http-20x-ok" +// "Duration2m3s" => "duration-2m3s" +// "Bld4Floor3rd" => "bld4-floor-3rd" func ToKebabCase(str string) string { return camelCaseToLowerCase(str, '-') } @@ -111,102 +113,288 @@ func camelCaseToLowerCase(str string, connector rune) string { } buf := &bytes.Buffer{} - var prev, r0, r1 rune - var size int + wt, word, remaining := nextWord(str) - r0 = connector + for len(remaining) > 0 { + if wt != connectorWord { + toLower(buf, wt, word, connector) + } - for len(str) > 0 { - prev = r0 - r0, size = utf8.DecodeRuneInString(str) - str = str[size:] + prev := wt + last := word + wt, word, remaining = nextWord(remaining) - switch { - case r0 == utf8.RuneError: - buf.WriteRune(r0) + switch prev { + case numberWord: + for wt == alphabetWord || wt == numberWord { + toLower(buf, wt, word, connector) + wt, word, remaining = nextWord(remaining) + } - case unicode.IsUpper(r0): - if prev != connector && !unicode.IsNumber(prev) { + if wt != invalidWord && wt != punctWord { buf.WriteRune(connector) } - buf.WriteRune(unicode.ToLower(r0)) + case connectorWord: + toLower(buf, prev, last, connector) + + case punctWord: + // nothing. + + default: + if wt != numberWord { + if wt != connectorWord && wt != punctWord { + buf.WriteRune(connector) + } + + break + } - if len(str) == 0 { + if len(remaining) == 0 { + toLower(buf, numberWord, word, connector) break } - r0, size = utf8.DecodeRuneInString(str) - str = str[size:] + last := word + wt, word, remaining = nextWord(remaining) + + // consider number as a part of previous word. + // e.g. "Bld4Floor" => "bld4_floor" + if wt != alphabetWord { + toLower(buf, numberWord, last, connector) + + if wt != connectorWord && wt != punctWord { + buf.WriteRune(connector) + } - if !unicode.IsUpper(r0) { - buf.WriteRune(r0) break } - // find next non-upper-case character and insert connector properly. - // it's designed to convert `HTTPServer` to `http_server`. - // if there are more than 2 adjacent upper case characters in a word, - // treat them as an abbreviation plus a normal word. - for len(str) > 0 { - r1 = r0 - r0, size = utf8.DecodeRuneInString(str) - str = str[size:] - - if r0 == utf8.RuneError { - buf.WriteRune(unicode.ToLower(r1)) - buf.WriteRune(r0) + // if there are some lower case letters following a number, + // add connector before the number. + // e.g. "HTTP2xx" => "http_2xx" + buf.WriteRune(connector) + toLower(buf, numberWord, last, connector) + + for wt == alphabetWord || wt == numberWord { + toLower(buf, wt, word, connector) + wt, word, remaining = nextWord(remaining) + } + + if wt != invalidWord && wt != connectorWord && wt != punctWord { + buf.WriteRune(connector) + } + } + } + + toLower(buf, wt, word, connector) + return buf.String() +} + +func isConnector(r rune) bool { + return r == '-' || r == '_' || unicode.IsSpace(r) +} + +type wordType int + +const ( + invalidWord wordType = iota + numberWord + upperCaseWord + alphabetWord + connectorWord + punctWord + otherWord +) + +func nextWord(str string) (wt wordType, word, remaining string) { + if len(str) == 0 { + return + } + + var offset int + remaining = str + r, size := nextValidRune(remaining, utf8.RuneError) + offset += size + + if r == utf8.RuneError { + wt = invalidWord + word = str[:offset] + remaining = str[offset:] + return + } + + switch { + case isConnector(r): + wt = connectorWord + remaining = remaining[size:] + + for len(remaining) > 0 { + r, size = nextValidRune(remaining, r) + + if !isConnector(r) { + break + } + + offset += size + remaining = remaining[size:] + } + + case unicode.IsPunct(r): + wt = punctWord + remaining = remaining[size:] + + for len(remaining) > 0 { + r, size = nextValidRune(remaining, r) + + if !unicode.IsPunct(r) { + break + } + + offset += size + remaining = remaining[size:] + } + + case unicode.IsUpper(r): + wt = upperCaseWord + remaining = remaining[size:] + + if len(remaining) == 0 { + break + } + + r, size = nextValidRune(remaining, r) + + switch { + case unicode.IsUpper(r): + prevSize := size + offset += size + remaining = remaining[size:] + + for len(remaining) > 0 { + r, size = nextValidRune(remaining, r) + + if !unicode.IsUpper(r) { break } - if !unicode.IsUpper(r0) { - if isConnector(r0) { - r0 = connector - - buf.WriteRune(unicode.ToLower(r1)) - } else if unicode.IsNumber(r0) { - // treat a number as an upper case rune - // so that both `http2xx` and `HTTP2XX` can be converted to `http_2xx`. - buf.WriteRune(unicode.ToLower(r1)) - buf.WriteRune(connector) - buf.WriteRune(r0) - } else { - buf.WriteRune(connector) - buf.WriteRune(unicode.ToLower(r1)) - buf.WriteRune(r0) - } + prevSize = size + offset += size + remaining = remaining[size:] + } + + // it's a bit complex when dealing with a case like "HTTPStatus". + // it's expected to be splitted into "HTTP" and "Status". + // Therefore "S" should be in remaining instead of word. + if len(remaining) > 0 && isAlphabet(r) { + offset -= prevSize + remaining = str[offset:] + } + + case isAlphabet(r): + offset += size + remaining = remaining[size:] + for len(remaining) > 0 { + r, size = nextValidRune(remaining, r) + + if !isAlphabet(r) || unicode.IsUpper(r) { break } - buf.WriteRune(unicode.ToLower(r1)) + offset += size + remaining = remaining[size:] } + } + + case isAlphabet(r): + wt = alphabetWord + remaining = remaining[size:] + + for len(remaining) > 0 { + r, size = nextValidRune(remaining, r) - if len(str) == 0 || r0 == connector { - buf.WriteRune(unicode.ToLower(r0)) + if !isAlphabet(r) || unicode.IsUpper(r) { + break } - case unicode.IsNumber(r0): - if prev != connector && !unicode.IsNumber(prev) { - buf.WriteRune(connector) + offset += size + remaining = remaining[size:] + } + + case unicode.IsNumber(r): + wt = numberWord + remaining = remaining[size:] + + for len(remaining) > 0 { + r, size = nextValidRune(remaining, r) + + if !unicode.IsNumber(r) { + break } - buf.WriteRune(r0) + offset += size + remaining = remaining[size:] + } - default: - if isConnector(r0) { - r0 = connector + default: + wt = otherWord + remaining = remaining[size:] + + for len(remaining) > 0 { + r, size = nextValidRune(remaining, r) + + if size == 0 || isConnector(r) || isAlphabet(r) || unicode.IsNumber(r) || unicode.IsPunct(r) { + break } - buf.WriteRune(r0) + offset += size + remaining = remaining[size:] } } - return buf.String() + word = str[:offset] + return } -func isConnector(r rune) bool { - return r == '-' || r == '_' || unicode.IsSpace(r) +func nextValidRune(str string, prev rune) (r rune, size int) { + var sz int + + for len(str) > 0 { + r, sz = utf8.DecodeRuneInString(str) + size += sz + + if r != utf8.RuneError { + return + } + + str = str[sz:] + } + + r = prev + return +} + +func toLower(buf *bytes.Buffer, wt wordType, str string, connector rune) { + buf.Grow(buf.Len() + len(str)) + + if wt != upperCaseWord && wt != connectorWord { + buf.WriteString(str) + return + } + + for len(str) > 0 { + r, size := utf8.DecodeRuneInString(str) + str = str[size:] + + if isConnector(r) { + buf.WriteRune(connector) + } else if unicode.IsUpper(r) { + buf.WriteRune(unicode.ToLower(r)) + } else { + buf.WriteRune(r) + } + } } // SwapCase will swap characters case from upper to lower or lower to upper. diff --git a/convert_test.go b/convert_test.go index 33222a1..5d19a61 100644 --- a/convert_test.go +++ b/convert_test.go @@ -23,18 +23,25 @@ func TestToSnakeCaseAndToKebabCase(t *testing.T) { "TW": "tw", "_C": "_c", "http2xx": "http_2xx", - "HTTP2XX": "http_2xx", + "HTTP2XX": "http2_xx", "HTTP20xOK": "http_20x_ok", - "HTTP20XStatus": "http_20x_status", + "HTTP20xStatus": "http_20x_status", "HTTP-20xStatus": "http_20x_status", "a": "a", + "Duration2m3s": "duration_2m3s", + "Bld4Floor3rd": "bld4_floor_3rd", + " _-_ ": "_____", + "a1b2c3d": "a_1b2c3d", + + "HTTP状态码404/502Error": "http_状态码404/502_error", + "中文(字符)": "中文(字符)", " sentence case ": "__sentence_case__", " Mixed-hyphen case _and SENTENCE_case and UPPER-case": "_mixed_hyphen_case__and_sentence_case_and_upper_case", "FROM CamelCase to snake/kebab-case": "from_camel_case_to_snake/kebab_case", "": "", - "Abc\uFFFDE\uFFFDf\uFFFDd\uFFFD2\uFFFD00Z\uFFFDZZ\uFFFDZZ": "abc\uFFFD_e\uFFFDf\uFFFDd\uFFFD_2\uFFFD_00z\uFFFD_zz\uFFFD_zz", + "Abc\uFFFDE\uFFFDf\uFFFDd\uFFFD2\uFFFD00z\uFFFDZZ\uFFFDZZ": "abc_\uFFFDe\uFFFDf\uFFFDd_\uFFFD2\uFFFD00z_\uFFFDzz\uFFFDzz", } runTestCases(t, ToSnakeCase, cases)