From 08f14d2b084bd0de772fd1ea01ba9705cad10c9e Mon Sep 17 00:00:00 2001 From: dudaodong Date: Fri, 8 Nov 2024 14:11:25 +0800 Subject: [PATCH] feat: add ExtractContent : --- docs/api/packages/strutil.md | 31 ++++++++++++ docs/en/api/packages/strutil.md | 34 ++++++++++++- strutil/string.go | 21 +++++++++ strutil/string_example_test.go | 12 +++++ strutil/string_test.go | 84 +++++++++++++++++++++++++++++++++ 5 files changed, 181 insertions(+), 1 deletion(-) diff --git a/docs/api/packages/strutil.md b/docs/api/packages/strutil.md index 3e214e3c..85023fcc 100644 --- a/docs/api/packages/strutil.md +++ b/docs/api/packages/strutil.md @@ -68,6 +68,7 @@ import ( - [Rotate](#Rotate) - [TemplateReplace](#TemplateReplace) - [RegexMatchAllGroups](#RegexMatchAllGroups) +- [ExtractContent](#ExtractContent)
@@ -1728,4 +1729,34 @@ func main() { // [john.doe@example.com john.doe example com] // [jane.doe@example.com jane.doe example com] } +``` + +### ExtractContent + +

提取两个标记之间的内容。

+ +函数签名: + +```go +func ExtractContent(s, start, end string) []string +``` + +示例:[Run](todo) + +```go +import ( + "fmt" + "github.com/duke-git/lancet/v2/strutil" +) + +func main() { + html := `content1aacontent2bbcontent1` + + result := strutil.ExtractContent(html, "", "") + + fmt.Println(result) + + // Output: + // [content1 content2 content1] +} ``` \ No newline at end of file diff --git a/docs/en/api/packages/strutil.md b/docs/en/api/packages/strutil.md index c0154158..d8c905a9 100644 --- a/docs/en/api/packages/strutil.md +++ b/docs/en/api/packages/strutil.md @@ -68,6 +68,8 @@ import ( - [Rotate](#Rotate) - [TemplateReplace](#TemplateReplace) - [RegexMatchAllGroups](#RegexMatchAllGroups) +- [ExtractContent](#RegexMatchAllGroups) +
@@ -1708,7 +1710,7 @@ func main() { func RegexMatchAllGroups(pattern, str string) [][]string ``` -example:[Run](https://go.dev/play/p/JZiu0RXpgN-) +Example:[Run](https://go.dev/play/p/JZiu0RXpgN-) ```go import ( @@ -1729,4 +1731,34 @@ func main() { // [john.doe@example.com john.doe example com] // [jane.doe@example.com jane.doe example com] } +``` + +### ExtractContent + +

Extracts the content between the start and end strings in the source string.

+ +Signature: + +```go +func ExtractContent(s, start, end string) []string +``` + +Example:[Run](todo) + +```go +import ( + "fmt" + "github.com/duke-git/lancet/v2/strutil" +) + +func main() { + html := `content1aacontent2bbcontent1` + + result := strutil.ExtractContent(html, "", "") + + fmt.Println(result) + + // Output: + // [content1 content2 content1] +} ``` \ No newline at end of file diff --git a/strutil/string.go b/strutil/string.go index 28a43c00..d927fd5c 100644 --- a/strutil/string.go +++ b/strutil/string.go @@ -735,3 +735,24 @@ func RegexMatchAllGroups(pattern, str string) [][]string { matches := re.FindAllStringSubmatch(str, -1) return matches } + +// ExtractContent extracts the content between the start and end strings in the source string. +// Play: todo +func ExtractContent(s, start, end string) []string { + result := []string{} + + for { + if _, after, ok := strings.Cut(s, start); ok { + if before, _, ok := strings.Cut(after, end); ok { + result = append(result, before) + s = after + } else { + break + } + } else { + break + } + } + + return result +} diff --git a/strutil/string_example_test.go b/strutil/string_example_test.go index 98408762..00eb7c12 100644 --- a/strutil/string_example_test.go +++ b/strutil/string_example_test.go @@ -753,3 +753,15 @@ func ExampleRegexMatchAllGroups() { // [john.doe@example.com john.doe example com] // [jane.doe@example.com jane.doe example com] } + +func ExampleExtractContent() { + html := `content1aacontent2bbcontent1` + + result := ExtractContent(html, "", "") + + fmt.Println(result) + + // Output: + // [content1 content2 content1] + +} diff --git a/strutil/string_test.go b/strutil/string_test.go index e7e068bc..79bd14ca 100644 --- a/strutil/string_test.go +++ b/strutil/string_test.go @@ -853,3 +853,87 @@ func TestRegexMatchAllGroups(t *testing.T) { assert.Equal(tt.expected, result) } } + +func TestExtractContent(t *testing.T) { + t.Parallel() + assert := internal.NewAssert(t, "TestExtractContent") + + tests := []struct { + name string + input string + start string + end string + expected []string + }{ + { + name: "Extract content between and ", + input: "This is content1 and content2 and content3", + start: "", + end: "", + expected: []string{"content1", "content2", "content3"}, + }, + { + name: "No tags in the string", + input: "This string has no tags", + start: "", + end: "", + expected: []string{}, + }, + { + name: "Single tag pair", + input: "onlyContent", + start: "", + end: "", + expected: []string{"onlyContent"}, + }, + { + name: "Tags without end tag", + input: "This content without end tag", + start: "", + end: "", + expected: []string{}, + }, + { + name: "Tags with nested content", + input: "content inner end", + start: "", + end: "", + expected: []string{"content inner end"}, + }, + { + name: "Edge case with empty string", + input: "", + start: "", + end: "", + expected: []string{}, + }, + { + name: "Edge case with no start tag", + input: "content without start tag", + start: "", + end: "", + expected: []string{}, + }, + { + name: "Edge case with no end tag", + input: "content without end tag", + start: "", + end: "", + expected: []string{}, + }, + { + name: "Multiple consecutive tags", + input: "content1content2content3", + start: "", + end: "", + expected: []string{"content1", "content2", "content3"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := ExtractContent(tt.input, tt.start, tt.end) + assert.Equal(tt.expected, result) + }) + } +}