From 08f14d2b084bd0de772fd1ea01ba9705cad10c9e Mon Sep 17 00:00:00 2001
From: dudaodong <lanliddd.2007@163.com>
Date: Fri, 8 Nov 2024 14:11:25 +0800
Subject: [PATCH] feat: add ExtractContent :

---
 docs/api/packages/strutil.md    | 31 ++++++++++++
 docs/en/api/packages/strutil.md | 34 ++++++++++++-
 strutil/string.go               | 21 +++++++++
 strutil/string_example_test.go  | 12 +++++
 strutil/string_test.go          | 84 +++++++++++++++++++++++++++++++++
 5 files changed, 181 insertions(+), 1 deletion(-)
diff --git a/docs/api/packages/strutil.md b/docs/api/packages/strutil.md
index 3e214e3c..85023fcc 100644
--- a/docs/api/packages/strutil.md
+++ b/docs/api/packages/strutil.md
@@ -68,6 +68,7 @@ import (
 -   [Rotate](#Rotate)
 -   [TemplateReplace](#TemplateReplace)
 -   [RegexMatchAllGroups](#RegexMatchAllGroups)
+-   [ExtractContent](#ExtractContent)
 
 
 <div STYLE="page-break-after: always;"></div>
@@ -1728,4 +1729,34 @@ func main() {
     // [john.doe@example.com john.doe example com]
     // [jane.doe@example.com jane.doe example com]
 }
+```
+
+### <span id="ExtractContent">ExtractContent</span>
+
+<p>提取两个标记之间的内容。</p>
+
+<b>函数签名:</b>
+
+```go
+func ExtractContent(s, start, end string) []string
+```
+
+<b>示例:<span style="float:right;display:inline-block;">[Run](todo)</span></b>
+
+```go
+import (
+    "fmt"
+    "github.com/duke-git/lancet/v2/strutil"
+)
+
+func main() {
+    html := `<span>content1</span>aa<span>content2</span>bb<span>content1</span>`
+
+    result := strutil.ExtractContent(html, "<span>", "</span>")
+
+    fmt.Println(result)
+
+    // Output:
+    // [content1 content2 content1]
+}
 ```
\ No newline at end of file
diff --git a/docs/en/api/packages/strutil.md b/docs/en/api/packages/strutil.md
index c0154158..d8c905a9 100644
--- a/docs/en/api/packages/strutil.md
+++ b/docs/en/api/packages/strutil.md
@@ -68,6 +68,8 @@ import (
 -   [Rotate](#Rotate)
 -   [TemplateReplace](#TemplateReplace)
 -   [RegexMatchAllGroups](#RegexMatchAllGroups)
+-   [ExtractContent](#RegexMatchAllGroups)
+
 
 <div STYLE="page-break-after: always;"></div>
 
@@ -1708,7 +1710,7 @@ func main() {
 func RegexMatchAllGroups(pattern, str string) [][]string
 ```
 
-<b>example:<span style="float:right;display:inline-block;">[Run](https://go.dev/play/p/JZiu0RXpgN-)</span></b>
+<b>Example:<span style="float:right;display:inline-block;">[Run](https://go.dev/play/p/JZiu0RXpgN-)</span></b>
 
 ```go
 import (
@@ -1729,4 +1731,34 @@ func main() {
     // [john.doe@example.com john.doe example com]
     // [jane.doe@example.com jane.doe example com]
 }
+```
+
+### <span id="ExtractContent">ExtractContent</span>
+
+<p>Extracts the content between the start and end strings in the source string.</p>
+
+<b>Signature:</b>
+
+```go
+func ExtractContent(s, start, end string) []string
+```
+
+<b>Example:<span style="float:right;display:inline-block;">[Run](todo)</span></b>
+
+```go
+import (
+    "fmt"
+    "github.com/duke-git/lancet/v2/strutil"
+)
+
+func main() {
+    html := `<span>content1</span>aa<span>content2</span>bb<span>content1</span>`
+
+    result := strutil.ExtractContent(html, "<span>", "</span>")
+
+    fmt.Println(result)
+
+    // Output:
+    // [content1 content2 content1]
+}
 ```
\ No newline at end of file
diff --git a/strutil/string.go b/strutil/string.go
index 28a43c00..d927fd5c 100644
--- a/strutil/string.go
+++ b/strutil/string.go
@@ -735,3 +735,24 @@ func RegexMatchAllGroups(pattern, str string) [][]string {
 	matches := re.FindAllStringSubmatch(str, -1)
 	return matches
 }
+
+// ExtractContent extracts the content between the start and end strings in the source string.
+// Play: todo
+func ExtractContent(s, start, end string) []string {
+	result := []string{}
+
+	for {
+		if _, after, ok := strings.Cut(s, start); ok {
+			if before, _, ok := strings.Cut(after, end); ok {
+				result = append(result, before)
+				s = after
+			} else {
+				break
+			}
+		} else {
+			break
+		}
+	}
+
+	return result
+}
diff --git a/strutil/string_example_test.go b/strutil/string_example_test.go
index 98408762..00eb7c12 100644
--- a/strutil/string_example_test.go
+++ b/strutil/string_example_test.go
@@ -753,3 +753,15 @@ func ExampleRegexMatchAllGroups() {
 	// [john.doe@example.com john.doe example com]
 	// [jane.doe@example.com jane.doe example com]
 }
+
+func ExampleExtractContent() {
+	html := `<span>content1</span>aa<span>content2</span>bb<span>content1</span>`
+
+	result := ExtractContent(html, "<span>", "</span>")
+
+	fmt.Println(result)
+
+	// Output:
+	// [content1 content2 content1]
+
+}
diff --git a/strutil/string_test.go b/strutil/string_test.go
index e7e068bc..79bd14ca 100644
--- a/strutil/string_test.go
+++ b/strutil/string_test.go
@@ -853,3 +853,87 @@ func TestRegexMatchAllGroups(t *testing.T) {
 		assert.Equal(tt.expected, result)
 	}
 }
+
+func TestExtractContent(t *testing.T) {
+	t.Parallel()
+	assert := internal.NewAssert(t, "TestExtractContent")
+
+	tests := []struct {
+		name     string
+		input    string
+		start    string
+		end      string
+		expected []string
+	}{
+		{
+			name:     "Extract content between <tag> and </tag>",
+			input:    "This is <tag>content1</tag> and <tag>content2</tag> and <tag>content3</tag>",
+			start:    "<tag>",
+			end:      "</tag>",
+			expected: []string{"content1", "content2", "content3"},
+		},
+		{
+			name:     "No tags in the string",
+			input:    "This string has no tags",
+			start:    "<tag>",
+			end:      "</tag>",
+			expected: []string{},
+		},
+		{
+			name:     "Single tag pair",
+			input:    "<tag>onlyContent</tag>",
+			start:    "<tag>",
+			end:      "</tag>",
+			expected: []string{"onlyContent"},
+		},
+		{
+			name:     "Tags without end tag",
+			input:    "This <tag>content without end tag",
+			start:    "<tag>",
+			end:      "</tag>",
+			expected: []string{},
+		},
+		{
+			name:     "Tags with nested content",
+			input:    "<tag>content <nested>inner</nested> end</tag>",
+			start:    "<tag>",
+			end:      "</tag>",
+			expected: []string{"content <nested>inner</nested> end"},
+		},
+		{
+			name:     "Edge case with empty string",
+			input:    "",
+			start:    "<tag>",
+			end:      "</tag>",
+			expected: []string{},
+		},
+		{
+			name:     "Edge case with no start tag",
+			input:    "content without start tag",
+			start:    "<tag>",
+			end:      "</tag>",
+			expected: []string{},
+		},
+		{
+			name:     "Edge case with no end tag",
+			input:    "<tag>content without end tag",
+			start:    "<tag>",
+			end:      "</tag>",
+			expected: []string{},
+		},
+		{
+			name:     "Multiple consecutive tags",
+			input:    "<tag>content1</tag><tag>content2</tag><tag>content3</tag>",
+			start:    "<tag>",
+			end:      "</tag>",
+			expected: []string{"content1", "content2", "content3"},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := ExtractContent(tt.input, tt.start, tt.end)
+			assert.Equal(tt.expected, result)
+		})
+	}
+}