Skip to content

Commit

Permalink
feat: add ExtractContent
Browse files Browse the repository at this point in the history
:
  • Loading branch information
duke-git committed Nov 8, 2024
1 parent 0ed2b11 commit 08f14d2
Show file tree
Hide file tree
Showing 5 changed files with 181 additions and 1 deletion.
31 changes: 31 additions & 0 deletions docs/api/packages/strutil.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ import (
- [Rotate](#Rotate)
- [TemplateReplace](#TemplateReplace)
- [RegexMatchAllGroups](#RegexMatchAllGroups)
- [ExtractContent](#ExtractContent)


<div STYLE="page-break-after: always;"></div>
Expand Down Expand Up @@ -1728,4 +1729,34 @@ func main() {
// [[email protected] john.doe example com]
// [[email protected] jane.doe example com]
}
```

### <span id="ExtractContent">ExtractContent</span>

<p>提取两个标记之间的内容。</p>

<b>函数签名:</b>

```go
func ExtractContent(s, start, end string) []string
```

<b>示例:<span style="float:right;display:inline-block;">[Run](todo)</span></b>

```go
import (
"fmt"
"github.com/duke-git/lancet/v2/strutil"
)

func main() {
html := `<span>content1</span>aa<span>content2</span>bb<span>content1</span>`

result := strutil.ExtractContent(html, "<span>", "</span>")

fmt.Println(result)

// Output:
// [content1 content2 content1]
}
```
34 changes: 33 additions & 1 deletion docs/en/api/packages/strutil.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ import (
- [Rotate](#Rotate)
- [TemplateReplace](#TemplateReplace)
- [RegexMatchAllGroups](#RegexMatchAllGroups)
- [ExtractContent](#RegexMatchAllGroups)


<div STYLE="page-break-after: always;"></div>

Expand Down Expand Up @@ -1708,7 +1710,7 @@ func main() {
func RegexMatchAllGroups(pattern, str string) [][]string
```

<b>example:<span style="float:right;display:inline-block;">[Run](https://go.dev/play/p/JZiu0RXpgN-)</span></b>
<b>Example:<span style="float:right;display:inline-block;">[Run](https://go.dev/play/p/JZiu0RXpgN-)</span></b>

```go
import (
Expand All @@ -1729,4 +1731,34 @@ func main() {
// [[email protected] john.doe example com]
// [[email protected] jane.doe example com]
}
```

### <span id="ExtractContent">ExtractContent</span>

<p>Extracts the content between the start and end strings in the source string.</p>

<b>Signature:</b>

```go
func ExtractContent(s, start, end string) []string
```

<b>Example:<span style="float:right;display:inline-block;">[Run](todo)</span></b>

```go
import (
"fmt"
"github.com/duke-git/lancet/v2/strutil"
)

func main() {
html := `<span>content1</span>aa<span>content2</span>bb<span>content1</span>`

result := strutil.ExtractContent(html, "<span>", "</span>")

fmt.Println(result)

// Output:
// [content1 content2 content1]
}
```
21 changes: 21 additions & 0 deletions strutil/string.go
Original file line number Diff line number Diff line change
Expand Up @@ -735,3 +735,24 @@ func RegexMatchAllGroups(pattern, str string) [][]string {
matches := re.FindAllStringSubmatch(str, -1)
return matches
}

// ExtractContent extracts the content between the start and end strings in the source string.
// Play: todo
func ExtractContent(s, start, end string) []string {
result := []string{}

for {
if _, after, ok := strings.Cut(s, start); ok {
if before, _, ok := strings.Cut(after, end); ok {
result = append(result, before)
s = after
} else {
break
}
} else {
break
}
}

return result
}
12 changes: 12 additions & 0 deletions strutil/string_example_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -753,3 +753,15 @@ func ExampleRegexMatchAllGroups() {
// [[email protected] john.doe example com]
// [[email protected] jane.doe example com]
}

func ExampleExtractContent() {
html := `<span>content1</span>aa<span>content2</span>bb<span>content1</span>`

result := ExtractContent(html, "<span>", "</span>")

fmt.Println(result)

// Output:
// [content1 content2 content1]

}
84 changes: 84 additions & 0 deletions strutil/string_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -853,3 +853,87 @@ func TestRegexMatchAllGroups(t *testing.T) {
assert.Equal(tt.expected, result)
}
}

func TestExtractContent(t *testing.T) {
t.Parallel()
assert := internal.NewAssert(t, "TestExtractContent")

tests := []struct {
name string
input string
start string
end string
expected []string
}{
{
name: "Extract content between <tag> and </tag>",
input: "This is <tag>content1</tag> and <tag>content2</tag> and <tag>content3</tag>",
start: "<tag>",
end: "</tag>",
expected: []string{"content1", "content2", "content3"},
},
{
name: "No tags in the string",
input: "This string has no tags",
start: "<tag>",
end: "</tag>",
expected: []string{},
},
{
name: "Single tag pair",
input: "<tag>onlyContent</tag>",
start: "<tag>",
end: "</tag>",
expected: []string{"onlyContent"},
},
{
name: "Tags without end tag",
input: "This <tag>content without end tag",
start: "<tag>",
end: "</tag>",
expected: []string{},
},
{
name: "Tags with nested content",
input: "<tag>content <nested>inner</nested> end</tag>",
start: "<tag>",
end: "</tag>",
expected: []string{"content <nested>inner</nested> end"},
},
{
name: "Edge case with empty string",
input: "",
start: "<tag>",
end: "</tag>",
expected: []string{},
},
{
name: "Edge case with no start tag",
input: "content without start tag",
start: "<tag>",
end: "</tag>",
expected: []string{},
},
{
name: "Edge case with no end tag",
input: "<tag>content without end tag",
start: "<tag>",
end: "</tag>",
expected: []string{},
},
{
name: "Multiple consecutive tags",
input: "<tag>content1</tag><tag>content2</tag><tag>content3</tag>",
start: "<tag>",
end: "</tag>",
expected: []string{"content1", "content2", "content3"},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := ExtractContent(tt.input, tt.start, tt.end)
assert.Equal(tt.expected, result)
})
}
}

0 comments on commit 08f14d2

Please sign in to comment.