Skip to content

Commit

Permalink
Fix HDSpace pubdate parsing (#5111)
Browse files Browse the repository at this point in the history
* Added the new hd-space pubdate formats to the unittests.

* Make changes to parse_pubdate() and hdspace.
So it will handle date times like: yesterday at 12:00:00

* Fixed parsing.

* Removed unused import.

* Remove prints.

* Update generic_provider.py

no need for log.exception here.

* Update generic_provider.py

Cleanup the log.

* Update test_generic_provider.py

calculate_date is not used anymore.

* Fixed comments.

* updated CHANGELOG.md

* Remove unused import
  • Loading branch information
p0psicles authored Sep 4, 2018
1 parent 60ff750 commit dfce238
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 3 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
- Fixed episode lookup with conflicting show IDs ([#4933](https://github.com/pymedusa/Medusa/pull/4933))
- Fixed error getting season scene exceptions on show page [#4964](https://github.com/pymedusa/Medusa/pull/4964)
- Fixed testing email notification with TLS ([#4972](https://github.com/pymedusa/Medusa/pull/4972))
- Fixed provider hd-space parsing pubdate like 'yesterday at 12:00:00' ([#5111](https://github.com/pymedusa/Medusa/pull/5111))
- Fixed apiv2 call hanging, when opening an anime show, that has malformed data on anidb (with anidb enabled) ([#4961](https://github.com/pymedusa/Medusa/pull/4961))

-----
Expand Down
15 changes: 14 additions & 1 deletion medusa/providers/generic_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -574,12 +574,25 @@ def parse_pubdate(pubdate, human_time=False, timezone=None, **kwargs):
matched_time = int(round(float(matched_time.strip())))

seconds = parse('{0} {1}'.format(matched_time, matched_granularity))
if seconds is None:
log.warning('Failed parsing human time: {0} {1}', matched_time, matched_granularity)
raise ValueError('Failed parsing human time: {0} {1}'.format(matched_time, matched_granularity))

return datetime.now(tz.tzlocal()) - timedelta(seconds=seconds)

if fromtimestamp:
dt = datetime.fromtimestamp(int(pubdate), tz=tz.gettz('UTC'))
else:
dt = parser.parse(pubdate, dayfirst=df, yearfirst=yf, fuzzy=True)
day_offset = 0
if 'yesterday at' in pubdate.lower() or 'today at' in pubdate.lower():
# Extract a time
time = re.search(r'(?P<time>[0-9:]+)', pubdate)
if time:
if 'yesterday' in pubdate:
day_offset = 1
pubdate = time.group('time').strip()

dt = parser.parse(pubdate, dayfirst=df, yearfirst=yf, fuzzy=True) - timedelta(days=day_offset)

# Always make UTC aware if naive
if dt.tzinfo is None or dt.tzinfo.utcoffset(dt) is None:
Expand Down
9 changes: 8 additions & 1 deletion medusa/providers/torrent/html/hdspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,14 @@ def parse(self, data, mode):
torrent_size = row.find('td', class_='lista222', attrs={'width': '100%'}).get_text()
size = convert_size(torrent_size) or -1

pubdate_raw = row.find_all('td', class_='lista', attrs={'align': 'center'})[3].get_text()
pubdate_td = row.find_all('td', class_='lista', attrs={'align': 'center'})[3]
pubdate_human_offset = pubdate_td.find('b')
if pubdate_human_offset:
time_search = re.search('([0-9:]+)', pubdate_td.get_text())
pubdate_raw = pubdate_human_offset.get_text() + ' at ' + time_search.group(1)
else:
pubdate_raw = pubdate_td.get_text()

pubdate = self.parse_pubdate(pubdate_raw)

item = {
Expand Down
12 changes: 11 additions & 1 deletion tests/providers/test_generic_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"""Provider test code for Generic Provider."""
from __future__ import unicode_literals

from datetime import date, datetime
from datetime import date, datetime, timedelta

from dateutil import tz

Expand Down Expand Up @@ -127,6 +127,16 @@
'timezone': 'US/Eastern',
'fromtimestamp': True
},
{ # p22: hd-space test human date like 'yesterday at 12:00:00'
'pubdate': 'yesterday at {0}'.format((datetime.now() - timedelta(minutes=10, seconds=25)).strftime('%H:%M:%S')),
'expected': datetime.now().replace(microsecond=0, tzinfo=tz.gettz('UTC')) - timedelta(days=1, minutes=10, seconds=25),
'human_time': False
},
{ # p23: hd-space test human date like 'today at 12:00:00'
'pubdate': 'today at {0}'.format((datetime.now() - timedelta(minutes=10, seconds=25)).strftime('%H:%M:%S')),
'expected': datetime.now().replace(microsecond=0, tzinfo=tz.gettz('UTC')) - timedelta(days=0, minutes=10, seconds=25),
'human_time': False
},
])
def test_parse_pubdate(p):
# Given
Expand Down

0 comments on commit dfce238

Please sign in to comment.