From 119de5e33b9e45ab6c318f458119a3cf8d9234c1 Mon Sep 17 00:00:00 2001 From: "daniel_fillol@hotmail.com" <55287657+DanielFillol@users.noreply.github.com> Date: Fri, 31 May 2024 23:36:00 -0300 Subject: [PATCH 1/2] feat: after testing decide to remove a few functions, write some new and modified all with better description and examples. --- goSpider.go | 615 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 399 insertions(+), 216 deletions(-) diff --git a/goSpider.go b/goSpider.go index 8468b4c..b2597da 100644 --- a/goSpider.go +++ b/goSpider.go @@ -6,19 +6,24 @@ import ( "github.com/chromedp/cdproto/cdp" "github.com/chromedp/cdproto/page" "github.com/chromedp/chromedp" + "io/ioutil" "log" "os" + "strings" "time" ) -// Navigator is a struct that holds the context for the ChromeDP session and a logger +// Navigator is a struct that holds the context for the ChromeDP session and a logger. type Navigator struct { Ctx context.Context Cancel context.CancelFunc Logger *log.Logger } -// NewNavigator creates a new Navigator instance +// NewNavigator creates a new Navigator instance. +// Example: +// +// nav := goSpider.NewNavigator() func NewNavigator() *Navigator { ctx, cancel := chromedp.NewContext(context.Background()) logger := log.New(os.Stdout, "goSpider: ", log.LstdFlags) @@ -29,330 +34,508 @@ func NewNavigator() *Navigator { } } -// FetchHTML fetches the HTML content of a given URL -func (nav *Navigator) FetchHTML(url string) (string, error) { - nav.Logger.Printf("Fetching HTML content from URL: %s\n", url) - var htmlContent string - err := chromedp.Run(nav.Ctx, +// OpenNewTab opens a new browser tab with the specified URL. +// Example: +// +// err := nav.OpenNewTab("https://www.example.com") +func (nav *Navigator) OpenNewTab(url string) error { + nav.Logger.Printf("Opening new tab with URL: %s\n", url) + ctx, cancel := chromedp.NewContext(nav.Ctx) + defer cancel() + err := chromedp.Run(ctx, chromedp.Navigate(url), - chromedp.OuterHTML("html", &htmlContent), ) if err != nil { - nav.Logger.Printf("Failed to fetch URL: %v\n", err) - return "", fmt.Errorf("failed to fetch URL: %v", err) + nav.Logger.Printf("Failed to open new tab: %v\n", err) + return fmt.Errorf("failed to open new tab: %v", err) } - nav.Logger.Println("HTML content fetched successfully") - return htmlContent, nil + // nav.Logger.Println("New tab opened successfully") + return nil } -// ClickButton clicks a button identified by the given selector -func (nav *Navigator) ClickButton(selector string) error { - nav.Logger.Printf("Clicking button with selector: %s\n", selector) +// OpenURL opens the specified URL in the current browser context. +// Example: +// +// err := nav.OpenURL("https://www.example.com") +func (nav *Navigator) OpenURL(url string) error { + nav.Logger.Printf("Opening URL: %s\n", url) err := chromedp.Run(nav.Ctx, - chromedp.Click(selector, chromedp.NodeVisible), + chromedp.Navigate(url), + chromedp.WaitReady("body"), // Ensures the page is fully loaded ) if err != nil { - nav.Logger.Printf("Failed to click button: %v\n", err) - return fmt.Errorf("failed to click button: %v", err) + nav.Logger.Printf("Failed to open URL: %v\n", err) + return fmt.Errorf("failed to open URL: %v", err) } - nav.Logger.Println("Button clicked successfully") + // nav.Logger.Println("URL opened successfully") return nil } -// FillSearchBar fills a search bar identified by the given selector and submits the form -func (nav *Navigator) FillSearchBar(selector, query string) error { - nav.Logger.Printf("Filling search bar with selector: %s and query: %s\n", selector, query) +// GetCurrentURL returns the current URL of the browser. +// Example: +// +// currentURL, err := nav.GetCurrentURL() +func (nav *Navigator) GetCurrentURL() (string, error) { + nav.Logger.Println("Extracting the current URL") + var currentURL string err := chromedp.Run(nav.Ctx, - chromedp.SetValue(selector, query, chromedp.NodeVisible), - chromedp.EvaluateAsDevTools(fmt.Sprintf(`document.querySelector('%s').closest('form').submit()`, selector), nil), + chromedp.Location(¤tURL), ) if err != nil { - nav.Logger.Printf("Failed to fill search bar: %v\n", err) - return fmt.Errorf("failed to fill search bar: %v", err) + nav.Logger.Printf("Failed to extract current URL: %v\n", err) + return "", fmt.Errorf("failed to extract current URL: %v", err) } - - nav.Logger.Println("Search bar filled and form submitted successfully") - return nil + // nav.Logger.Println("Current URL extracted successfully") + return currentURL, nil } -// OpenNewTab opens a new tab with the given URL -func (nav *Navigator) OpenNewTab(url string) error { - nav.Logger.Printf("Opening new tab with URL: %s\n", url) - ctx, cancel := chromedp.NewContext(nav.Ctx) - defer cancel() - err := chromedp.Run(ctx, +// Login logs into a website using the provided credentials and selectors. +// Example: +// +// err := nav.Login("https://www.example.com/login", "username", "password", "#username", "#password", "#login-button", "Login failed") +func (nav *Navigator) Login(url, username, password, usernameSelector, passwordSelector, loginButtonSelector string, messageFailedSuccess string) error { + nav.Logger.Printf("Logging into URL: %s\n", url) + err := chromedp.Run(nav.Ctx, chromedp.Navigate(url), + chromedp.WaitVisible(usernameSelector, chromedp.ByQuery), + chromedp.SendKeys(usernameSelector, username, chromedp.ByQuery), + chromedp.WaitVisible(passwordSelector, chromedp.ByQuery), + chromedp.SendKeys(passwordSelector, password, chromedp.ByQuery), + chromedp.WaitVisible(loginButtonSelector, chromedp.ByQuery), + chromedp.Click(loginButtonSelector, chromedp.ByQuery), + chromedp.WaitReady("body"), // Wait for the next page to load ) if err != nil { - nav.Logger.Printf("Failed to open new tab: %v\n", err) - return fmt.Errorf("failed to open new tab: %v", err) + if messageFailedSuccess != "" { + message, err := nav.GetElement(messageFailedSuccess) + if err == nil { + nav.Logger.Printf("Message found: %s", message) + } else { + nav.Logger.Printf("Message was not found") + } + } + + nav.Logger.Printf("Failed to log in: %v\n", err) + return fmt.Errorf("failed to log in: %v", err) } - nav.Logger.Println("New tab opened successfully") + // nav.Logger.Println("Logged in successfully") return nil } -// ExtractLinks extracts all the links from the current page -func (nav *Navigator) ExtractLinks() ([]string, error) { - nav.Logger.Println("Extracting links from the current page") - var links []string +// CaptureScreenshot captures a screenshot of the current browser window. +// Example: +// +// err := nav.CaptureScreenshot() +func (nav *Navigator) CaptureScreenshot() error { + var buf []byte + // nav.Logger.Println("Capturing screenshot") err := chromedp.Run(nav.Ctx, - chromedp.Evaluate(`Array.from(document.querySelectorAll('a')).map(a => a.href)`, &links), + chromedp.CaptureScreenshot(&buf), ) if err != nil { - nav.Logger.Printf("Failed to extract links: %v\n", err) - return nil, fmt.Errorf("failed to extract links: %v", err) + nav.Logger.Printf("Failed to capture screenshot: %v\n", err) + return fmt.Errorf("failed to capture screenshot: %v", err) } - nav.Logger.Println("Links extracted successfully") - return links, nil -} - -// ExtractText extracts all the text content from the current page -func (nav *Navigator) ExtractText() (string, error) { - nav.Logger.Println("Extracting text content from the current page") - var text string - err := chromedp.Run(nav.Ctx, - chromedp.OuterHTML("body", &text, chromedp.NodeVisible), - ) + err = ioutil.WriteFile("screenshot.png", buf, 0644) if err != nil { - nav.Logger.Printf("Failed to extract text: %v\n", err) - return "", fmt.Errorf("failed to extract text: %v", err) + nav.Logger.Printf("Failed to save screenshot: %v\n", err) + return fmt.Errorf("failed to save screenshot: %v", err) } - nav.Logger.Println("Text content extracted successfully") - return text, nil + nav.Logger.Println("Screenshot saved successfully") + return nil } -// FillForm fills a form with the provided data -func (nav *Navigator) FillForm(formSelector string, data map[string]string) error { - nav.Logger.Printf("Filling form with selector: %s and data: %v\n", formSelector, data) - tasks := []chromedp.Action{ - chromedp.WaitVisible(formSelector), - } - for field, value := range data { - tasks = append(tasks, chromedp.SetValue(fmt.Sprintf("%s [name=%s]", formSelector, field), value)) - } - tasks = append(tasks, chromedp.Submit(formSelector)) +// GetElement retrieves the text content of an element specified by the selector. +// Example: +// +// text, err := nav.GetElement("#elementID") +func (nav *Navigator) GetElement(selector string) (string, error) { + var content string - err := chromedp.Run(nav.Ctx, tasks...) + err := nav.WaitForElement(selector, 3*time.Second) if err != nil { - nav.Logger.Printf("Failed to fill form: %v\n", err) - return fmt.Errorf("failed to fill form: %v", err) + nav.Logger.Printf("Failed waiting for element: %v\n", err) + return "", fmt.Errorf("failed waiting for element: %v", err) } - nav.Logger.Println("Form filled and submitted successfully") - return nil -} -// HandleAlert handles a JavaScript alert by accepting it -func (nav *Navigator) HandleAlert() error { - nav.Logger.Println("Handling JavaScript alert by accepting it") + err = chromedp.Run(nav.Ctx, + chromedp.Text(selector, &content, chromedp.ByQuery, chromedp.NodeVisible), + ) + if err != nil && err.Error() != "could not find node" { + nav.Logger.Printf("Failed to get element: %v\n", err) + return "", fmt.Errorf("failed to get element: %v", err) + } + if content == "" { + return "", nil // Element not found or empty + } + return content, nil +} - listenCtx, cancel := context.WithCancel(nav.Ctx) +// WaitForElement waits for an element specified by the selector to be visible within the given timeout. +// Example: +// +// err := nav.WaitForElement("#elementID", 5*time.Second) +func (nav *Navigator) WaitForElement(selector string, timeout time.Duration) error { + nav.Logger.Printf("Waiting for element with selector: %s to be visible\n", selector) + ctx, cancel := context.WithTimeout(nav.Ctx, timeout) defer cancel() + _ = chromedp.Run(ctx, + chromedp.WaitVisible(selector), + ) + // if err != nil { + // nav.Logger.Printf("Failed to wait for element: %v\n", err) + // return fmt.Errorf("failed to wait for element: %v", err) + // } + // nav.Logger.Println("Element is now visible") + return nil +} - chromedp.ListenTarget(listenCtx, func(ev interface{}) { - switch ev := ev.(type) { - case *page.EventJavascriptDialogOpening: - nav.Logger.Printf("Alert detected: %s", ev.Message) - err := chromedp.Run(nav.Ctx, - page.HandleJavaScriptDialog(true), - ) - if err != nil { - nav.Logger.Printf("Failed to handle alert: %v\n", err) - } - } - }) +// ClickButton clicks a button specified by the selector. +// Example: +// +// err := nav.ClickButton("#buttonID") +func (nav *Navigator) ClickButton(selector string) error { + nav.Logger.Printf("Clicking button with selector: %s\n", selector) - // Run a no-op to wait for the dialog to be handled - err := chromedp.Run(nav.Ctx, chromedp.Sleep(2*time.Second)) + err := nav.WaitForElement(selector, 3*time.Second) if err != nil { - nav.Logger.Printf("Failed to handle alert: %v\n", err) - return fmt.Errorf("failed to handle alert: %v", err) + nav.Logger.Printf("Failed waiting for element: %v\n", err) + return fmt.Errorf("failed waiting for element: %v", err) } - nav.Logger.Println("JavaScript alert accepted successfully") - return nil -} - -// SelectDropdown selects an option from a dropdown menu identified by the selector and option value -func (nav *Navigator) SelectDropdown(selector, value string) error { - nav.Logger.Printf("Selecting dropdown option with selector: %s and value: %s\n", selector, value) - err := chromedp.Run(nav.Ctx, - chromedp.SetValue(selector, value, chromedp.NodeVisible), + err = chromedp.Run(nav.Ctx, + chromedp.Click(selector, chromedp.NodeVisible), ) if err != nil { - nav.Logger.Printf("Failed to select dropdown option: %v\n", err) - return fmt.Errorf("failed to select dropdown option: %v", err) + nav.Logger.Printf("Failed to click button: %v\n", err) + return fmt.Errorf("failed to click button: %v", err) } - nav.Logger.Println("Dropdown option selected successfully") + // nav.Logger.Println("Button clicked successfully") + chromedp.WaitReady("body") return nil } -// CheckCheckbox checks a checkbox identified by the selector -func (nav *Navigator) CheckCheckbox(selector string) error { - nav.Logger.Printf("Checking checkbox with selector: %s\n", selector) +// ClickElement clicks an element specified by the selector. +// Example: +// +// err := nav.ClickElement("#elementID") +func (nav *Navigator) ClickElement(selector string) error { + nav.Logger.Printf("Clicking element with selector: %s\n", selector) err := chromedp.Run(nav.Ctx, - chromedp.SetAttributeValue(selector, "checked", "true", chromedp.NodeVisible), + chromedp.Click(selector, chromedp.ByID), ) if err != nil { - nav.Logger.Printf("Failed to check checkbox: %v\n", err) - return fmt.Errorf("failed to check checkbox: %v", err) + log.Printf("chromedp error: %v", err) } - nav.Logger.Println("Checkbox checked successfully") + return nil } -// UncheckCheckbox unchecks a checkbox identified by the selector -func (nav *Navigator) UncheckCheckbox(selector string) error { - nav.Logger.Printf("Unchecking checkbox with selector: %s\n", selector) - err := chromedp.Run(nav.Ctx, - chromedp.RemoveAttribute(selector, "checked", chromedp.NodeVisible), - ) +// CheckRadioButton selects a radio button specified by the selector. +// Example: +// +// err := nav.CheckRadioButton("#radioButtonID") +func (nav *Navigator) CheckRadioButton(selector string) error { + nav.Logger.Printf("Selecting radio button with selector: %s\n", selector) + + err := nav.WaitForElement(selector, 3*time.Second) if err != nil { - nav.Logger.Printf("Failed to uncheck checkbox: %v\n", err) - return fmt.Errorf("failed to uncheck checkbox: %v", err) + nav.Logger.Printf("Failed waiting for element: %v\n", err) + return fmt.Errorf("failed waiting for element: %v", err) } - nav.Logger.Println("Checkbox unchecked successfully") - return nil -} -// SelectRadioButton selects a radio button identified by the selector -func (nav *Navigator) SelectRadioButton(selector string) error { - nav.Logger.Printf("Selecting radio button with selector: %s\n", selector) - err := chromedp.Run(nav.Ctx, + err = chromedp.Run(nav.Ctx, chromedp.Click(selector, chromedp.NodeVisible), ) if err != nil { nav.Logger.Printf("Failed to select radio button: %v\n", err) return fmt.Errorf("failed to select radio button: %v", err) } - nav.Logger.Println("Radio button selected successfully") + // nav.Logger.Println("Radio button selected successfully") return nil } -// UploadFile uploads a file to a file input identified by the selector -func (nav *Navigator) UploadFile(selector, filePath string) error { - nav.Logger.Printf("Uploading file with selector: %s and file path: %s\n", selector, filePath) - err := chromedp.Run(nav.Ctx, - chromedp.SetUploadFiles(selector, []string{filePath}), +// UncheckRadioButton unchecks a checkbox specified by the selector. +// Example: +// +// err := nav.UncheckRadioButton("#checkboxID") +func (nav *Navigator) UncheckRadioButton(selector string) error { + nav.Logger.Printf("Unchecking checkbox with selector: %s\n", selector) + + err := nav.WaitForElement(selector, 3*time.Second) + if err != nil { + nav.Logger.Printf("Failed waiting for element: %v\n", err) + return fmt.Errorf("failed waiting for element: %v", err) + } + + err = chromedp.Run(nav.Ctx, + chromedp.RemoveAttribute(selector, "checked", chromedp.NodeVisible), ) if err != nil { - nav.Logger.Printf("Failed to upload file: %v\n", err) - return fmt.Errorf("failed to upload file: %v", err) + nav.Logger.Printf("Failed to uncheck radio button: %v\n", err) + return fmt.Errorf("failed to uncheck radio button: %v", err) } - nav.Logger.Println("File uploaded successfully") + // nav.Logger.Println("Checkbox unchecked successfully") return nil } -// WaitForElement waits for an element identified by the selector to be visible -func (nav *Navigator) WaitForElement(selector string, timeout time.Duration) error { - nav.Logger.Printf("Waiting for element with selector: %s to be visible\n", selector) - ctx, cancel := context.WithTimeout(nav.Ctx, timeout) - defer cancel() - err := chromedp.Run(ctx, - chromedp.WaitVisible(selector), +// FillField fills a field specified by the selector with the provided value. +// Example: +// +// err := nav.FillField("#fieldID", "value") +func (nav *Navigator) FillField(selector string, value string) error { + nav.Logger.Printf("Filling field with selector: %s\n", selector) + err := nav.WaitForElement(selector, 3*time.Second) + if err != nil { + nav.Logger.Printf("Failed waiting for element: %v\n", err) + return fmt.Errorf("failed waiting for element: %v", err) + } + + err = chromedp.Run(nav.Ctx, + chromedp.SendKeys(selector, value, chromedp.ByQuery), ) if err != nil { - nav.Logger.Printf("Failed to wait for element: %v\n", err) - return fmt.Errorf("failed to wait for element: %v", err) + nav.Logger.Printf("Failed to fill field with selector: %v\n", err) + return fmt.Errorf("failed to fill field with selector: %v", err) } - nav.Logger.Println("Element is now visible") return nil } -// WaitForAJAX waits for AJAX requests to complete by monitoring the network activity -func (nav *Navigator) WaitForAJAX(timeout time.Duration) error { - nav.Logger.Println("Waiting for AJAX requests to complete") - ctx, cancel := context.WithTimeout(nav.Ctx, timeout) - defer cancel() - err := chromedp.Run(ctx, - chromedp.Sleep(timeout), +// ExtractTableData extracts data from a table specified by the selector. +// Example: +// +// tableData, err := nav.ExtractTableData("#tableID") +func (nav *Navigator) ExtractTableData(selector string) ([]map[int]map[string]interface{}, error) { + nav.Logger.Printf("Extracting table data with selector: %s\n", selector) + var rows []*cdp.Node + err := chromedp.Run(nav.Ctx, + chromedp.Nodes(selector+" tr", &rows, chromedp.ByQueryAll), ) if err != nil { - nav.Logger.Printf("Failed to wait for AJAX requests: %v\n", err) - return fmt.Errorf("failed to wait for AJAX requests: %v", err) + nav.Logger.Printf("Failed to extract table rows: %v\n", err) + return nil, fmt.Errorf("failed to extract table rows: %v", err) } - nav.Logger.Println("AJAX requests completed") - return nil + + var tableData []map[int]map[string]interface{} + for _, row := range rows { + // nav.Logger.Printf("Processing row %d", rowIndex) + var cells []*cdp.Node + err = chromedp.Run(nav.Ctx, + chromedp.Nodes("td, th", &cells, chromedp.ByQueryAll, chromedp.FromNode(row)), + ) + if err != nil { + nav.Logger.Printf("Failed to extract table cells: %v\n", err) + return nil, fmt.Errorf("failed to extract table cells: %v", err) + } + + rowData := make(map[int]map[string]interface{}) + for cellIndex, cell := range cells { + // nav.Logger.Printf("Processing cell %d in row %d", cellIndex, rowIndex) + cellData := make(map[string]interface{}) + + var cellText string + err = chromedp.Run(nav.Ctx, + chromedp.Text(cell.FullXPath(), &cellText, chromedp.NodeVisible), + ) + if err != nil { + nav.Logger.Printf("Failed to get cell text: %v\n", err) + return nil, fmt.Errorf("failed to get cell text: %v", err) + } + cellData["text"] = cellText + + // Check for any nested spans within the cell + var nestedSpans []*cdp.Node + nestedSpansErr := chromedp.Run(nav.Ctx, + chromedp.Nodes(cell.FullXPath()+"//span", &nestedSpans, chromedp.ByQueryAll), + ) + if nestedSpansErr != nil { + // nav.Logger.Printf("No nested spans found in cell %d of row %d: %v\n", cellIndex, rowIndex, nestedSpansErr) + // No nested spans found, continue processing + nestedSpans = []*cdp.Node{} + } + + spanData := make(map[int]string) + for spanIndex, span := range nestedSpans { + // nav.Logger.Printf("Processing span %d in cell %d of row %d", spanIndex, cellIndex, rowIndex) + var spanText string + err = chromedp.Run(nav.Ctx, + chromedp.Text(span.FullXPath(), &spanText, chromedp.NodeVisible), + ) + if err != nil { + nav.Logger.Printf("Failed to get span text: %v\n", err) + return nil, fmt.Errorf("failed to get span text: %v", err) + } + spanData[spanIndex] = spanText + } + + if len(spanData) > 0 { + cellData["spans"] = spanData + } + + rowData[cellIndex] = cellData + } + tableData = append(tableData, rowData) + } + // nav.Logger.Println("Table data extracted successfully") + return tableData, nil +} + +// ExtractDivText extracts text content from divs specified by the parent selectors. +// Example: +// +// textData, err := nav.ExtractDivText("#parent1", "#parent2") +func (nav *Navigator) ExtractDivText(parentSelectors ...string) (map[string]string, error) { + nav.Logger.Println("Extracting text from divs") + data := make(map[string]string) + for _, parentSelector := range parentSelectors { + var nodes []*cdp.Node + err := chromedp.Run(nav.Ctx, + chromedp.Nodes(parentSelector+" span, "+parentSelector+" div", &nodes, chromedp.ByQueryAll), + ) + if err != nil { + nav.Logger.Printf("Failed to extract nodes from %s: %v\n", parentSelector, err) + return nil, fmt.Errorf("failed to extract nodes from %s: %v", parentSelector, err) + } + for _, node := range nodes { + if node.NodeType == cdp.NodeTypeText { + continue + } + var text string + err = chromedp.Run(nav.Ctx, + chromedp.TextContent(node.FullXPath(), &text), + ) + if err != nil { + nav.Logger.Printf("Failed to extract text content from %s: %v\n", node.FullXPath(), err) + return nil, fmt.Errorf("failed to extract text content from %s: %v", node.FullXPath(), err) + } + data[node.AttributeValue("id")] = strings.TrimSpace(text) + } + } + // nav.Logger.Println("Text extracted successfully from divs") + return data, nil } -// Close closes the Navigator instance +// Close closes the Navigator instance and releases resources. +// Example: +// +// nav.Close() func (nav *Navigator) Close() { - nav.Logger.Println("Closing the Navigator instance") + // nav.Logger.Println("Closing the Navigator instance") nav.Cancel() nav.Logger.Println("Navigator instance closed successfully") } -// GetCurrentURL extracts the current URL from the browser -// Returns the current URL as a string and an error if any -func (nav *Navigator) GetCurrentURL() (string, error) { - nav.Logger.Println("Extracting the current URL") - var currentURL string +// FetchHTML fetches the HTML content of the specified URL. +// Example: +// +// htmlContent, err := nav.FetchHTML("https://www.example.com") +func (nav *Navigator) FetchHTML(url string) (string, error) { + nav.Logger.Printf("Fetching HTML content from URL: %s\n", url) + var htmlContent string err := chromedp.Run(nav.Ctx, - chromedp.Location(¤tURL), + chromedp.Navigate(url), + chromedp.OuterHTML("html", &htmlContent), ) if err != nil { - nav.Logger.Printf("Failed to extract current URL: %v\n", err) - return "", fmt.Errorf("failed to extract current URL: %v", err) + nav.Logger.Printf("Failed to fetch URL: %v\n", err) + return "", fmt.Errorf("failed to fetch URL: %v", err) } - nav.Logger.Println("Current URL extracted successfully") - return currentURL, nil + nav.Logger.Println("HTML content fetched successfully") + return htmlContent, nil } -// FindElements finds multiple elements identified by the given selector and returns their outer HTML -// selector: the CSS selector of the elements to find -// Returns a slice of outer HTML strings for each element and an error if any -func (nav *Navigator) FindElements(selector string) ([]string, error) { - nav.Logger.Printf("Finding elements with selector: %s\n", selector) - var nodes []*cdp.Node +// ExtractLinks extracts all links from the current page. +// Example: +// +// links, err := nav.ExtractLinks() +func (nav *Navigator) ExtractLinks() ([]string, error) { + nav.Logger.Println("Extracting links from the current page") + var links []string err := chromedp.Run(nav.Ctx, - chromedp.Nodes(selector, &nodes, chromedp.ByQueryAll), + chromedp.Evaluate(`Array.from(document.querySelectorAll('a')).map(a => a.href)`, &links), ) if err != nil { - nav.Logger.Printf("Failed to find elements: %v\n", err) - return nil, fmt.Errorf("failed to find elements: %v", err) + nav.Logger.Printf("Failed to extract links: %v\n", err) + return nil, fmt.Errorf("failed to extract links: %v", err) } + // nav.Logger.Println("Links extracted successfully") + return links, nil +} - var outerHTMLs []string - for _, node := range nodes { - var outerHTML string - err = chromedp.Run(nav.Ctx, - chromedp.OuterHTML(fmt.Sprintf("#%s", node.AttributeValue("id")), &outerHTML, chromedp.NodeVisible), - ) - if err != nil { - nav.Logger.Printf("Failed to get outer HTML for node: %v\n", err) - return nil, fmt.Errorf("failed to get outer HTML for node: %v", err) +// FillForm fills out a form specified by the selector with the provided data and submits it. +// Example: +// +// formData := map[string]string{ +// "username": "myUsername", +// "password": "myPassword", +// } +// err := nav.FillForm("#loginForm", formData) +func (nav *Navigator) FillForm(formSelector string, data map[string]string) error { + nav.Logger.Printf("Filling form with selector: %s and data: %v\n", formSelector, data) + tasks := []chromedp.Action{ + chromedp.WaitVisible(formSelector), + } + for field, value := range data { + tasks = append(tasks, chromedp.SetValue(fmt.Sprintf("%s [name=%s]", formSelector, field), value)) + } + tasks = append(tasks, chromedp.Submit(formSelector)) + + err := chromedp.Run(nav.Ctx, tasks...) + if err != nil { + nav.Logger.Printf("Failed to fill form: %v\n", err) + return fmt.Errorf("failed to fill form: %v", err) + } + // nav.Logger.Println("Form filled and submitted successfully") + return nil +} + +// HandleAlert handles JavaScript alerts by accepting them. +// Example: +// +// err := nav.HandleAlert() +func (nav *Navigator) HandleAlert() error { + nav.Logger.Println("Handling JavaScript alert by accepting it") + + listenCtx, cancel := context.WithCancel(nav.Ctx) + defer cancel() + + chromedp.ListenTarget(listenCtx, func(ev interface{}) { + switch ev := ev.(type) { + case *page.EventJavascriptDialogOpening: + nav.Logger.Printf("Alert detected: %s", ev.Message) + err := chromedp.Run(nav.Ctx, + page.HandleJavaScriptDialog(true), + ) + if err != nil { + nav.Logger.Printf("Failed to handle alert: %v\n", err) + } } - outerHTMLs = append(outerHTMLs, outerHTML) + }) + + // Run a no-op to wait for the dialog to be handled + err := chromedp.Run(nav.Ctx, chromedp.Sleep(2*time.Second)) + if err != nil { + nav.Logger.Printf("Failed to handle alert: %v\n", err) + return fmt.Errorf("failed to handle alert: %v", err) } - nav.Logger.Println("Elements found successfully") - return outerHTMLs, nil + // nav.Logger.Println("JavaScript alert accepted successfully") + return nil } -// FindElement finds multiple elements identified by the given selector and returns their outer HTML -// selector: the CSS selector of the elements to find -// Returns a slice of outer HTML strings for each element and an error if any -func (nav *Navigator) FindElement(selector string) (string, error) { - nav.Logger.Printf("Finding elements with selector: %s\n", selector) - var nodes []*cdp.Node +// SelectDropdown selects an option in a dropdown specified by the selector and value. +// Example: +// +// err := nav.SelectDropdown("#dropdownID", "optionValue") +func (nav *Navigator) SelectDropdown(selector, value string) error { + nav.Logger.Printf("Selecting dropdown option with selector: %s and value: %s\n", selector, value) err := chromedp.Run(nav.Ctx, - chromedp.Nodes(selector, &nodes, chromedp.ByQueryAll), + chromedp.SetValue(selector, value, chromedp.NodeVisible), ) if err != nil { - nav.Logger.Printf("Failed to find elements: %v\n", err) - return "", fmt.Errorf("failed to find elements: %v", err) - } - - var outerHTML string - for _, node := range nodes { - err = chromedp.Run(nav.Ctx, - chromedp.OuterHTML(fmt.Sprintf("#%s", node.AttributeValue("id")), &outerHTML, chromedp.NodeVisible), - ) - if err != nil { - nav.Logger.Printf("Failed to get outer HTML for node: %v\n", err) - return outerHTML, fmt.Errorf("failed to get outer HTML for node: %v", err) - } + nav.Logger.Printf("Failed to select dropdown option: %v\n", err) + return fmt.Errorf("failed to select dropdown option: %v", err) } - - nav.Logger.Println("Elements found successfully") - return outerHTML, nil + // nav.Logger.Println("Dropdown option selected successfully") + return nil } From 624690d10e7044bf05cae86ef5773591ccee5a22 Mon Sep 17 00:00:00 2001 From: "daniel_fillol@hotmail.com" <55287657+DanielFillol@users.noreply.github.com> Date: Fri, 31 May 2024 23:36:55 -0300 Subject: [PATCH 2/2] feat: remove unused test functions --- goSpider_test.go | 100 ++--------------------------------------------- 1 file changed, 3 insertions(+), 97 deletions(-) diff --git a/goSpider_test.go b/goSpider_test.go index 71437dc..9fee2c1 100644 --- a/goSpider_test.go +++ b/goSpider_test.go @@ -71,19 +71,11 @@ func TestNestedElement(t *testing.T) { } } -// TestFillSearchBar tests filling a search bar and submitting the form -func TestFillSearchBar(t *testing.T) { - err := nav.FillSearchBar("#searchBar", "test query") - if err != nil { - t.Errorf("FillSearchBar error: %v", err) - } -} - // TestFillFormAndHandleAlert tests filling a form and handling the resulting alert func TestFillFormAndHandleAlert(t *testing.T) { formData := map[string]string{ - "username": "testuser", - "password": "testpass", + "username": "test_user", + "password": "test_pass", } err := nav.FillForm("#loginForm", formData) if err != nil { @@ -104,35 +96,14 @@ func TestSelectDropdown(t *testing.T) { } } -// TestCheckbox tests checking and unchecking a checkbox -func TestCheckbox(t *testing.T) { - err := nav.CheckCheckbox("#checkbox") - if err != nil { - t.Errorf("CheckCheckbox error: %v", err) - } - - err = nav.UncheckCheckbox("#checkbox") - if err != nil { - t.Errorf("UncheckCheckbox error: %v", err) - } -} - // TestSelectRadioButton tests selecting a radio button func TestSelectRadioButton(t *testing.T) { - err := nav.SelectRadioButton("#radioButton") + err := nav.CheckRadioButton("#radioButton") if err != nil { t.Errorf("SelectRadioButton error: %v", err) } } -// TestUploadFile tests uploading a file -func TestUploadFile(t *testing.T) { - err := nav.UploadFile("#fileInput", "testfile.txt") - if err != nil { - t.Errorf("UploadFile error: %v", err) - } -} - // TestWaitForElement tests waiting for an element to be visible after a delay func TestWaitForElement(t *testing.T) { err := nav.WaitForElement("#delayedElement", 10*time.Second) @@ -141,14 +112,6 @@ func TestWaitForElement(t *testing.T) { } } -// TestWaitForAJAX tests waiting for AJAX requests to complete -func TestWaitForAJAX(t *testing.T) { - err := nav.WaitForAJAX(10 * time.Second) - if err != nil { - t.Errorf("WaitForAJAX error: %v", err) - } -} - // TestGetCurrentURL tests extracting the current URL from the browser func TestGetCurrentURL(t *testing.T) { // Navigate to the main page @@ -190,60 +153,3 @@ func TestGetCurrentURL(t *testing.T) { t.Errorf("Expected URL: %s, but got: %s", expectedURL, currentURL) } } - -// TestFindElement tests extracting the selected a single element from a node -func TestFindElement(t *testing.T) { - // Start the test server - server := StartTestServer() - defer server.Close() - - // Give the server a moment to start - time.Sleep(1 * time.Second) - - // Create a new navigator instance - nav := NewNavigator() - defer nav.Close() - - _, err := nav.FetchHTML("http://localhost:8080") - if err != nil { - t.Errorf("FetchHTML error: %v", err) - } - - // Test finding a single element - elementHTML, err := nav.FindElement("#exampleButton") - if err != nil { - t.Errorf("FindElement error: %v", err) - } - - if elementHTML == "" { - t.Error("FindElement returned empty content") - } -} - -// TestFindElements tests extracting the selected a group of elements from a node -func TestFindElements(t *testing.T) { - // Start the test server - server := StartTestServer() - defer server.Close() - - // Give the server a moment to start - time.Sleep(1 * time.Second) - - // Create a new navigator instance - nav := NewNavigator() - defer nav.Close() - - _, err := nav.FetchHTML("http://localhost:8080") - if err != nil { - t.Errorf("FetchHTML error: %v", err) - } - - // Test finding multiple elements - elementsHTML, err := nav.FindElements("#exampleButton") - if err != nil { - t.Errorf("FindElements error: %v", err) - } - if len(elementsHTML) == 0 { - t.Error("FindElements returned no elements") - } -}