Skip to content

Commit

Permalink
Try helpers.py's retrieve_url first, then fallback to urllib if it do…
Browse files Browse the repository at this point in the history
…esn't support POST
  • Loading branch information
ducalex committed Aug 13, 2024
1 parent 30e878c commit 365cdb1
Showing 1 changed file with 18 additions and 37 deletions.
55 changes: 18 additions & 37 deletions nova3/engines/eztv.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,47 +2,13 @@
# AUTHORS: nindogo
# CONTRIBUTORS: Diego de las Heras ([email protected])

import io
import gzip
import urllib.error
import urllib.parse
import urllib.request
from html.parser import HTMLParser

from novaprinter import prettyPrinter
from helpers import htmlentitydecode

# Some sites blocks default python User-agent
headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:125.0) Gecko/20100101 Firefox/125.0'}

# We must implement our own retrieve_url because helpers.py versions prior to 1.49 did not
# support POST requests. That version is taken from helpers.py 1.45
def retrieve_url(url, data=None):
""" Return the content of the url page as a string """
req = urllib.request.Request(url, data, headers)
try:
response = urllib.request.urlopen(req)
except urllib.error.URLError as errno:
print(" ".join(("Connection error:", str(errno.reason))))
return ""
dat = response.read()
# Check if it is gzipped
if dat[:2] == b'\x1f\x8b':
# Data is gzip encoded, decode it
compressedstream = io.BytesIO(dat)
gzipper = gzip.GzipFile(fileobj=compressedstream)
extracted_data = gzipper.read()
dat = extracted_data
info = response.info()
charset = 'utf-8'
try:
ignore, charset = info['Content-Type'].split('charset=')
except Exception:
pass
dat = dat.decode(charset, 'replace')
dat = htmlentitydecode(dat)
# return dat.encode('utf-8', 'replace')
return dat
from helpers import retrieve_url


class eztv(object):
Expand Down Expand Up @@ -96,9 +62,24 @@ def handle_endtag(self, tag):
prettyPrinter(self.current_item)
self.in_table_row = False

def do_query(self, what):
url = f"{self.url}/search/{what.replace('%20', '-')}"
data =b"layout=def_wlinks"
try:
return retrieve_url(url, request_data=data)
except TypeError:
# Older versions of retrieve_url did not support request_data/POST, se we must do the
# request ourselves...
user_agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:125.0) Gecko/20100101 Firefox/125.0'
req = urllib.request.Request(url, data, {'User-Agent': user_agent})
try:
return urllib.request.urlopen(req).read().decode('utf-8')
except urllib.error.URLError as errno:
print(f"Connection error: {errno.reason}")
return ""

def search(self, what, cat='all'):
query = self.url + '/search/' + what.replace('%20', '-')
eztv_html = retrieve_url(query, b"layout=def_wlinks")
eztv_html = self.do_query(what)

eztv_parser = self.MyHtmlParser(self.url)
eztv_parser.feed(eztv_html)
Expand Down

0 comments on commit 365cdb1

Please sign in to comment.