-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathshowlinks.py
executable file
·49 lines (43 loc) · 1.74 KB
/
showlinks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# -*- coding: utf-8 -*-
import feedparser
import html2text
import re
import time
from bs4 import BeautifulSoup
from bs4.diagnose import diagnose
feedUrl = 'https://techmeme.com/techmeme-ride-home-feed'
#feedUrl = 'https://rss.art19.com/coronavirus-daily-briefing'
rhfeed = feedparser.parse(feedUrl)
for post in rhfeed.entries:
postPubTime = time.strftime("%A, %B %d %Y" ,post.published_parsed)
podTitle = ""
podTitleArray = post.title.split(' - ')
if len(podTitleArray) > 1:
podTitle = podTitleArray[1]
else:
podTitle = podTitleArray[0]
print ("\n**" + postPubTime + " - " + podTitle + "**\n")
cleanPost = post.summary.replace('\n', '')
soup = BeautifulSoup(cleanPost, 'html5lib')
linksBlock = soup.find_all("p", string=re.compile("Links(:*)$|Stories:$"))
# check to see if we found anything
# specifically at least one paragraph stating Links were coming and that the following ul contains a tags
# this is a horrible way to do things but it's working so far
if len(linksBlock) > 0 and len(linksBlock[0].next_sibling.find_all('li')) > 0:
ul = str(linksBlock[0].next_sibling)
html = html2text.html2text(ul)
print (html)
else:
uls = soup.find_all("ul")
if len(uls) == 1:
print(html2text.html2text(str(uls[0])))
else:
print("No show links for this episode ¯\_(ツ)_/¯\n")
linksBlock = soup.find_all("p", string=re.compile("^Sponsors(:*)(\ *)$"))
if len(linksBlock) > 0 and len(linksBlock[0].next_sibling.find_all('li')) > 0:
ul = str(linksBlock[0].next_sibling)
html = html2text.html2text(ul)
#print ("**Sponsors:**\n")
#print (html)
#print("[Subscribe to the ad-free Premium Feed inside your podcast app here!](https://kimberlite.fm/ridehome/)\n")
#print("[All show links](https://pberry.github.io/ridehome/all-links.html)")