-
Notifications
You must be signed in to change notification settings - Fork 8
/
cpasbien.py
120 lines (103 loc) · 3.93 KB
/
cpasbien.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# -*- coding: utf-8 -*-
#VERSION: 1.1
#AUTHOR: Davy39 <[email protected]>
#CONTRIBUTORS: Simon <[email protected]>
# Copyleft
from __future__ import print_function
import re
try:
# python2
from HTMLParser import HTMLParser
except ImportError:
# python3
from html.parser import HTMLParser
from helpers import download_file, retrieve_url
from novaprinter import prettyPrinter
class cpasbien(object):
url = "http://www.cpasbien.cm"
name = "Cpasbien (french)"
supported_categories = {
"all": [""],
"books": ["ebook/"],
"movies": ["films/"],
"tv": ["series/"],
"music": ["musique/"],
"software": ["logiciels/"],
"games": ["jeux-pc/", "jeux-consoles/"]
}
def __init__(self):
self.results = []
self.parser = self.SimpleHTMLParser(self.results, self.url)
def download_torrent(self, url):
print(download_file(url))
class SimpleHTMLParser(HTMLParser):
def __init__(self, results, url, *args):
HTMLParser.__init__(self)
self.url = url
self.div_counter = None
self.current_item = None
self.results = results
def handle_starttag(self, tag, attr):
method = 'start_' + tag
if hasattr(self, method) and tag in ('a', 'div'):
getattr(self, method)(attr)
def start_a(self, attr):
params = dict(attr)
if params.get('href', '').startswith(self.url + '/dl-torrent/'):
self.current_item = {}
self.div_counter = 0
self.current_item["desc_link"] = params["href"]
fname = params["href"].split('/')[-1]
fname = re.sub(r'\.html$', '.torrent', fname, flags=re.IGNORECASE)
self.current_item["link"] = self.url + '/telechargement/' + fname
def start_div(self, attr):
if self.div_counter is not None:
self.div_counter += 1
# Abort if div class does not match
div_classes = {1: 'poid', 2: 'up', 3: 'down'}
attr = dict(attr)
if div_classes[self.div_counter] not in attr.get('class', ''):
self.div_counter = None
self.current_item = None
def handle_data(self, data):
data = data.strip()
if data:
if self.div_counter == 0:
self.current_item['name'] = data
elif self.div_counter == 1:
self.current_item['size'] = unit_fr2en(data)
elif self.div_counter == 2:
self.current_item['seeds'] = data
elif self.div_counter == 3:
self.current_item['leech'] = data
# End of current_item, final validation:
if self.div_counter == 3:
required_keys = ('name', 'size')
if any(key in self.current_item for key in required_keys):
self.current_item['engine_url'] = self.url
prettyPrinter(self.current_item)
self.results.append("a")
else:
pass
self.current_item = None
self.div_counter = None
def search(self, what, cat="all"):
for page in range(35):
results = []
parser = self.SimpleHTMLParser(results, self.url)
for subcat in self.supported_categories[cat]:
data = retrieve_url(
'{}/recherche/{}{}/page-{},trie-seeds-d'
.format(self.url, subcat, what, page)
)
parser.feed(data)
parser.close()
if len(results) <= 0:
break
def unit_fr2en(size):
"""Convert french size unit to english"""
return re.sub(
r'([KMGTP])o',
lambda match: match.group(1) + 'B',
size, flags=re.IGNORECASE
)