-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlyrics.py
executable file
·29 lines (27 loc) · 1.14 KB
/
lyrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import re
import json
import requests
from bs4 import BeautifulSoup
def LyricWikia(artist, title):
url = 'http://lyrics.wikia.com/api.php?action=lyrics&artist={artist}&song={title}&fmt=json&func=getSong'.format(
artist=artist,
title=title).replace(" ", "%20")
r = requests.get(url)
# We got some bad formatted JSON data... So we need to fix stuff :/
returned = r.text
returned = returned.replace('"', "")
returned = returned.replace("\'", "\"")
returned = returned.replace("song = ", "")
returned = json.loads(returned)
if returned["lyrics"] != "Not found":
# set the url to the url we just recieved, and retrieving it
r = requests.get(returned["url"])
soup = BeautifulSoup(r.text, 'html.parser')
soup = soup.find("div", {"class": "lyricbox"})
[elem.extract() for elem in soup.findAll('div')]
[elem.replaceWith('\n') for elem in soup.findAll('br')]
soup = BeautifulSoup(re.sub(r'(<!--[.\s\S]*-->)', '', str(soup)), 'html.parser')
[elem.extract() for elem in soup.findAll('script')]
return soup.getText()
else:
return "error"