ytdl-org · Kasperdilday · Mar 20, 2019 · Mar 20, 2019 · dstftw · Apr 6, 2019
diff --git a/youtube_dl/extractor/youporn.py b/youtube_dl/extractor/youporn.py
@@ -57,6 +57,20 @@ class YouPornIE(InfoExtractor):
         'params': {
             'skip_download': True,
         },
+    }, {
+        # Different URL (videoUrl) structure, has file extension in path
+        'url': 'https://www.youporn.com/watch/13922959/femdom-principal/',
+        'info_dict': {
+            'id': '13822959',
+            'display_id': 'femdom-principal',
+            'ext': 'mp4',
+            'format': '720p-4000k - 720p',
+            'height': 720,
+            'tbr': 4000
+        },
+        'params': {
+            'skip_download': True,
+        },
     }]
 
     def _real_extract(self, url):
@@ -119,8 +133,9 @@ def _real_extract(self, url):
             # Video URL's path looks like this:
             #  /201012/17/505835/720p_1500k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
             #  /201012/17/505835/vl_240p_240k_505835/YouPorn%20-%20Sex%20Ed%20Is%20It%20Safe%20To%20Masturbate%20Daily.mp4
+            #  /videos/201703/11/109285532/720P_4000K_109285532.mp4?rate=248k&burst=1400k&validfrom=1553107800&validto=1553122200&hash=NzBS4CUWB2RpgA9thDRS0Ouw5PM%3D
             # We will benefit from it by extracting some metadata
-            mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+/', video_url)
+            mobj = re.search(r'(?P<height>\d{3,4})[pP]_(?P<bitrate>\d+)[kK]_\d+', video_url)
             if mobj:
                 height = int(mobj.group('height'))
                 bitrate = int(mobj.group('bitrate'))
@@ -145,9 +160,8 @@ def _real_extract(self, url):
             r'(?s)<div[^>]+class=["\']submitByLink["\'][^>]*>(.+?)</div>',
             webpage, 'uploader', fatal=False)
         upload_date = unified_strdate(self._html_search_regex(
-            [r'Date\s+[Aa]dded:\s*<span>([^<]+)',
-             r'(?s)<div[^>]+class=["\']videoInfo(?:Date|Time)["\'][^>]*>(.+?)</div>'],
-            webpage, 'upload date', fatal=False))
+                r'<div[^>]+class=["\']video-uploaded["\'][^>]*>[^<]+<span>([^<]+)</span></div>',
+                webpage, 'upload date', fatal=False))
 
         age_limit = self._rta_search(webpage)
 
@@ -158,9 +172,7 @@ def _real_extract(self, url):
         view_count = str_to_int(self._search_regex(
             r'(?s)<div[^>]+class=(["\']).*?\bvideoInfoViews\b.*?\1[^>]*>.*?(?P<count>[\d,.]+)<',
             webpage, 'view count', fatal=False, group='count'))
-        comment_count = str_to_int(self._search_regex(
-            r'>All [Cc]omments? \(([\d,.]+)\)',
-            webpage, 'comment count', fatal=False))
+        comment_count = len(re.findall(r'<div[^>]+class=([\"\']).*?videoComment\b.*?\1', webpage))
 
         def extract_tag_box(regex, title):
             tag_box = self._search_regex(regex, webpage, title, default=None)