-
Notifications
You must be signed in to change notification settings - Fork 0
/
osrsmusictrackscrape.py
35 lines (29 loc) · 1.13 KB
/
osrsmusictrackscrape.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import requests
from bs4 import BeautifulSoup
#Get HTML of webpage in string
url = "https://oldschool.runescape.wiki/w/Music#Track_list"
y = requests.get(url)
#Pass into the BeautifulSoup to get all tables. All information needed on this page is found in tbales
soup = BeautifulSoup(y.text, features="html.parser")
tables = soup.findAll("table")
#Save to text file
file = open("musictracks.txt","a")
#Loop through tables
for i in tables:
#Look for only the wikitables which contain the data needed
if "wikitable" in i["class"]:
rows = i.findChildren("tr")
for j in rows:
#Get text for entire row
rowText = j.text
#Replace new line characters with underscore and then split on that for array
rowText = rowText.replace("\n", "_")
rowArray = rowText.split("_")
#Trim extra array elements that are not needed
rowArray = rowArray[1:-7]
rowArray.pop(1)
#Final result
if rowArray[0] != "Name":
file.writelines(rowArray[0] + " - " + rowArray[1] + "\n")
print(rowArray)
file.close()