-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathYahoo_Scraper_1.0.py
94 lines (71 loc) · 3.05 KB
/
Yahoo_Scraper_1.0.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#Old version
from selenium import webdriver
from bs4 import BeautifulSoup
import time
from datetime import datetime, timedelta
import sys
# Set up the web driver
driver = webdriver.Edge()
# Get the HTML content from the link
def get_week_dates():
today = datetime.today()
# Calculate the next Sunday date
days_ahead = 6 - today.weekday() # Number of days until next Sunday
if days_ahead < 0: # If today is Sunday, get date for next Sunday
days_ahead += 7
sunday = today + timedelta(days=days_ahead)
# Calculate the Saturday date of the next week
saturday = sunday + timedelta(days=6)
return sunday.strftime('%Y-%m-%d'), saturday.strftime('%Y-%m-%d')
if __name__ == "__main__":
start_date, end_date = get_week_dates()
# Set start date to be one day ahead of the current date
start_date = (datetime.today() + timedelta(days=1)).strftime('%Y-%m-%d')
target_date = datetime.today().strftime('%Y-%m-%d')
print("Start Date:", start_date)
print("End Date:", end_date)# end_date= 2023-12-09
print("Target Date:", target_date) #target_date= 2023-12-02
url = f"https://finance.yahoo.com/calendar/splits?from={start_date}&to={end_date}&day={start_date}"
#url=f"https://finance.yahoo.com/calendar/splits?from=2023-12-03&to=2023-12-09"
driver.get(url)
print(url)
# Wait for the page to fully load
driver.implicitly_wait(10)
# Wait for 5 seconds before closing the web browser window
time.sleep(2)
driver.quit()
# Prompt the user to filter the tickers by their length
filter_by_length = input("Do you want to filter by tickers that only have 3-4 characters? (yes/no): ")
# Set up the web driver again
driver = webdriver.Edge()
# Get the HTML content from the link again
driver.get(url)
# Wait for the page to fully load again
driver.implicitly_wait(10)
# Get the page source
html = driver.page_source
# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(html, "html.parser")
# Find all the <a> elements with the data-test attribute set to "quoteLink"
links = soup.find_all("a", {"data-test": "quoteLink"})
# Extract the symbols/tickers and links from the <a> elements
symbols_and_links = []
for link in links:
symbol = link.text
href = 'https://finance.yahoo.com' + link["href"]
symbols_and_links.append((symbol, href))
# Print the symbols/tickers and links
print("The symbols/tickers and their links on that page are:")
filtered_symbols_and_links = []
for symbol, href in symbols_and_links:
# Check if the user wants to filter by length and if the symbol has 3-4 characters
if filter_by_length.lower() == "yes" and len(symbol) not in [3, 4]:
continue # Skip this symbol if it doesn't meet the criteria
filtered_symbols_and_links.append((symbol, href))
if not filtered_symbols_and_links:
print("None closing program...") # Print "None" if there are no stocks with 3-4 characters
sys.exit() # Exit the program if there are no stocks with 3-4 characters
for symbol, href in filtered_symbols_and_links:
print(f"{symbol}: {href}")
# Close the web driver again
driver.quit()