Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Find element and find element #6

Merged
merged 2 commits into from
May 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 62 additions & 1 deletion goSpider.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package goSpider
import (
"context"
"fmt"
"github.com/chromedp/cdproto/cdp"
"github.com/chromedp/cdproto/page"
"github.com/chromedp/chromedp"
"log"
Expand All @@ -20,7 +21,7 @@ type Navigator struct {
// NewNavigator creates a new Navigator instance
func NewNavigator() *Navigator {
ctx, cancel := chromedp.NewContext(context.Background())
logger := log.New(os.Stdout, "webnav: ", log.LstdFlags)
logger := log.New(os.Stdout, "goSpider: ", log.LstdFlags)
return &Navigator{
Ctx: ctx,
Cancel: cancel,
Expand Down Expand Up @@ -295,3 +296,63 @@ func (nav *Navigator) GetCurrentURL() (string, error) {
nav.Logger.Println("Current URL extracted successfully")
return currentURL, nil
}

// FindElements finds multiple elements identified by the given selector and returns their outer HTML
// selector: the CSS selector of the elements to find
// Returns a slice of outer HTML strings for each element and an error if any
func (nav *Navigator) FindElements(selector string) ([]string, error) {
nav.Logger.Printf("Finding elements with selector: %s\n", selector)
var nodes []*cdp.Node
err := chromedp.Run(nav.Ctx,
chromedp.Nodes(selector, &nodes, chromedp.ByQueryAll),
)
if err != nil {
nav.Logger.Printf("Failed to find elements: %v\n", err)
return nil, fmt.Errorf("failed to find elements: %v", err)
}

var outerHTMLs []string
for _, node := range nodes {
var outerHTML string
err = chromedp.Run(nav.Ctx,
chromedp.OuterHTML(fmt.Sprintf("#%s", node.AttributeValue("id")), &outerHTML, chromedp.NodeVisible),
)
if err != nil {
nav.Logger.Printf("Failed to get outer HTML for node: %v\n", err)
return nil, fmt.Errorf("failed to get outer HTML for node: %v", err)
}
outerHTMLs = append(outerHTMLs, outerHTML)
}

nav.Logger.Println("Elements found successfully")
return outerHTMLs, nil
}

// FindElement finds multiple elements identified by the given selector and returns their outer HTML
// selector: the CSS selector of the elements to find
// Returns a slice of outer HTML strings for each element and an error if any
func (nav *Navigator) FindElement(selector string) (string, error) {
nav.Logger.Printf("Finding elements with selector: %s\n", selector)
var nodes []*cdp.Node
err := chromedp.Run(nav.Ctx,
chromedp.Nodes(selector, &nodes, chromedp.ByQueryAll),
)
if err != nil {
nav.Logger.Printf("Failed to find elements: %v\n", err)
return "", fmt.Errorf("failed to find elements: %v", err)
}

var outerHTML string
for _, node := range nodes {
err = chromedp.Run(nav.Ctx,
chromedp.OuterHTML(fmt.Sprintf("#%s", node.AttributeValue("id")), &outerHTML, chromedp.NodeVisible),
)
if err != nil {
nav.Logger.Printf("Failed to get outer HTML for node: %v\n", err)
return outerHTML, fmt.Errorf("failed to get outer HTML for node: %v", err)
}
}

nav.Logger.Println("Elements found successfully")
return outerHTML, nil
}
57 changes: 57 additions & 0 deletions goSpider_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -190,3 +190,60 @@ func TestGetCurrentURL(t *testing.T) {
t.Errorf("Expected URL: %s, but got: %s", expectedURL, currentURL)
}
}

// TestFindElement tests extracting the selected a single element from a node
func TestFindElement(t *testing.T) {
// Start the test server
server := StartTestServer()
defer server.Close()

// Give the server a moment to start
time.Sleep(1 * time.Second)

// Create a new navigator instance
nav := NewNavigator()
defer nav.Close()

_, err := nav.FetchHTML("http://localhost:8080")
if err != nil {
t.Errorf("FetchHTML error: %v", err)
}

// Test finding a single element
elementHTML, err := nav.FindElement("#exampleButton")
if err != nil {
t.Errorf("FindElement error: %v", err)
}

if elementHTML == "" {
t.Error("FindElement returned empty content")
}
}

// TestFindElements tests extracting the selected a group of elements from a node
func TestFindElements(t *testing.T) {
// Start the test server
server := StartTestServer()
defer server.Close()

// Give the server a moment to start
time.Sleep(1 * time.Second)

// Create a new navigator instance
nav := NewNavigator()
defer nav.Close()

_, err := nav.FetchHTML("http://localhost:8080")
if err != nil {
t.Errorf("FetchHTML error: %v", err)
}

// Test finding multiple elements
elementsHTML, err := nav.FindElements("#exampleButton")
if err != nil {
t.Errorf("FindElements error: %v", err)
}
if len(elementsHTML) == 0 {
t.Error("FindElements returned no elements")
}
}