Skip to content

Commit

Permalink
Merge pull request #6 from DanielFillol/daniel
Browse files Browse the repository at this point in the history
Find element and find element
  • Loading branch information
DanielFillol authored May 28, 2024
2 parents 7c973cd + e6c619f commit 6005b44
Show file tree
Hide file tree
Showing 2 changed files with 119 additions and 1 deletion.
63 changes: 62 additions & 1 deletion goSpider.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package goSpider
import (
"context"
"fmt"
"github.com/chromedp/cdproto/cdp"
"github.com/chromedp/cdproto/page"
"github.com/chromedp/chromedp"
"log"
Expand All @@ -20,7 +21,7 @@ type Navigator struct {
// NewNavigator creates a new Navigator instance
func NewNavigator() *Navigator {
ctx, cancel := chromedp.NewContext(context.Background())
logger := log.New(os.Stdout, "webnav: ", log.LstdFlags)
logger := log.New(os.Stdout, "goSpider: ", log.LstdFlags)
return &Navigator{
Ctx: ctx,
Cancel: cancel,
Expand Down Expand Up @@ -295,3 +296,63 @@ func (nav *Navigator) GetCurrentURL() (string, error) {
nav.Logger.Println("Current URL extracted successfully")
return currentURL, nil
}

// FindElements finds multiple elements identified by the given selector and returns their outer HTML
// selector: the CSS selector of the elements to find
// Returns a slice of outer HTML strings for each element and an error if any
func (nav *Navigator) FindElements(selector string) ([]string, error) {
nav.Logger.Printf("Finding elements with selector: %s\n", selector)
var nodes []*cdp.Node
err := chromedp.Run(nav.Ctx,
chromedp.Nodes(selector, &nodes, chromedp.ByQueryAll),
)
if err != nil {
nav.Logger.Printf("Failed to find elements: %v\n", err)
return nil, fmt.Errorf("failed to find elements: %v", err)
}

var outerHTMLs []string
for _, node := range nodes {
var outerHTML string
err = chromedp.Run(nav.Ctx,
chromedp.OuterHTML(fmt.Sprintf("#%s", node.AttributeValue("id")), &outerHTML, chromedp.NodeVisible),
)
if err != nil {
nav.Logger.Printf("Failed to get outer HTML for node: %v\n", err)
return nil, fmt.Errorf("failed to get outer HTML for node: %v", err)
}
outerHTMLs = append(outerHTMLs, outerHTML)
}

nav.Logger.Println("Elements found successfully")
return outerHTMLs, nil
}

// FindElement finds multiple elements identified by the given selector and returns their outer HTML
// selector: the CSS selector of the elements to find
// Returns a slice of outer HTML strings for each element and an error if any
func (nav *Navigator) FindElement(selector string) (string, error) {
nav.Logger.Printf("Finding elements with selector: %s\n", selector)
var nodes []*cdp.Node
err := chromedp.Run(nav.Ctx,
chromedp.Nodes(selector, &nodes, chromedp.ByQueryAll),
)
if err != nil {
nav.Logger.Printf("Failed to find elements: %v\n", err)
return "", fmt.Errorf("failed to find elements: %v", err)
}

var outerHTML string
for _, node := range nodes {
err = chromedp.Run(nav.Ctx,
chromedp.OuterHTML(fmt.Sprintf("#%s", node.AttributeValue("id")), &outerHTML, chromedp.NodeVisible),
)
if err != nil {
nav.Logger.Printf("Failed to get outer HTML for node: %v\n", err)
return outerHTML, fmt.Errorf("failed to get outer HTML for node: %v", err)
}
}

nav.Logger.Println("Elements found successfully")
return outerHTML, nil
}
57 changes: 57 additions & 0 deletions goSpider_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -190,3 +190,60 @@ func TestGetCurrentURL(t *testing.T) {
t.Errorf("Expected URL: %s, but got: %s", expectedURL, currentURL)
}
}

// TestFindElement tests extracting the selected a single element from a node
func TestFindElement(t *testing.T) {
// Start the test server
server := StartTestServer()
defer server.Close()

// Give the server a moment to start
time.Sleep(1 * time.Second)

// Create a new navigator instance
nav := NewNavigator()
defer nav.Close()

_, err := nav.FetchHTML("http://localhost:8080")
if err != nil {
t.Errorf("FetchHTML error: %v", err)
}

// Test finding a single element
elementHTML, err := nav.FindElement("#exampleButton")
if err != nil {
t.Errorf("FindElement error: %v", err)
}

if elementHTML == "" {
t.Error("FindElement returned empty content")
}
}

// TestFindElements tests extracting the selected a group of elements from a node
func TestFindElements(t *testing.T) {
// Start the test server
server := StartTestServer()
defer server.Close()

// Give the server a moment to start
time.Sleep(1 * time.Second)

// Create a new navigator instance
nav := NewNavigator()
defer nav.Close()

_, err := nav.FetchHTML("http://localhost:8080")
if err != nil {
t.Errorf("FetchHTML error: %v", err)
}

// Test finding multiple elements
elementsHTML, err := nav.FindElements("#exampleButton")
if err != nil {
t.Errorf("FindElements error: %v", err)
}
if len(elementsHTML) == 0 {
t.Error("FindElements returned no elements")
}
}

0 comments on commit 6005b44

Please sign in to comment.