forked from matiassingers/spotify-pricing
-
Notifications
You must be signed in to change notification settings - Fork 1
/
crawler.php
143 lines (120 loc) · 5.37 KB
/
crawler.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
<?php
include('lib/selector.inc');
// show errors
ini_set('error_reporting', E_ALL);
ini_set('display_errors', 1);
ini_set('display_startup_errors', 1);
$countrycodes = json_decode(file_get_contents(__DIR__.'/data/countries.json'), true); // get database with countries names
$summary = json_decode(file_get_contents(__DIR__.'/data/summary.json'), true); // load existing prices
$token = ''; // App ID for Open Exchange Rates
$exchange = json_decode(file_get_contents('https://openexchangerates.org/api/latest.json?app_id='.$token), true); // get exchange rates
$data = '';
function getHtml($url) {
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_FRESH_CONNECT, TRUE);
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36');
$response = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
return $response;
}
function getPrice($url, $rel) {
$response = getHtml($url);
if ($response !== false) {
$dom = new SelectorDOM($response);
if (($rel == 'ID' || $rel == 'VN' || $rel == 'BD' || $rel == 'PH' || $rel == 'TH' || $rel == 'MY' || $rel == 'KE' || $rel == 'EG' || $rel == 'CO' || $rel == 'PK'
|| $rel == 'LK' || $rel == 'IN' || $rel == 'ZA' || $rel == 'SA' || $rel == 'TR')
&& isset($dom->select('div[data-current-plan-text]')[1]['children'][2]['text'])){ // Some countries have daily plans, so we take second block
$price = $dom->select('div[data-current-plan-text]')[1]['children'][2]['text'];
}
elseif (isset($dom->select('div[data-current-plan-text]')[0]['children'][2]['text'])) { // Standard pages
$price = $dom->select('div[data-current-plan-text]')[0]['children'][2]['text'];
}
elseif (isset($dom->select('h2')[0]['text'])){ // Temporary fix for new markets with one tariff plan
$price = $dom->select('h2')[0]['text'];
};
$price = str_replace(',', '.', $price);
$price = preg_replace('/[^,.0-9]/', '', $price);
$price = ltrim($price, '.');
$price = rtrim($price, '.');
if ($rel == 'CL' || $rel == 'CO') {
$price = preg_replace('/00/', '', $price, 1);
$price = str_replace('.', '', $price);
};
if ($rel == 'TZ' || $rel == 'UG' || $rel == 'KR' || $rel == 'ID' || $rel == 'VN' || $rel == 'IQ') {
$price = str_replace('.', '', $price);
};
return $price;
}
else {
echo 'Bad link';
};
sleep(1); // timeout because of anti ddos
}
function unique_multidim_array($array, $key) {
$temp_array = array();
$i = 0;
$key_array = array();
foreach($array as $val) {
if (!in_array($val[$key], $key_array)) {
$key_array[$i] = $val[$key];
$temp_array[$i] = $val;
}
$i++;
}
return $temp_array;
}
/* Fix Russia block
$response = getHtml('https://www.spotify.com/us/select-your-country-region/'); */
$response = getHtml('https://spotify.wowgamr.ru/data/countries.html');
if ($response !== false)
{
$dom = new SelectorDOM($response);
$links = $dom->select('.encore-light-theme li a'); // get list of countries
for ($i = 0; $i < count($links); $i++) {
$restoredPrice = false;
$rel = strtoupper(substr($links[$i]['attributes']['href'], 1, 2)); // substr 'ca-fr' to 'CA'
if ($rel == 'RU'){
continue;
};
/* Fix Russia block
$price = getPrice('https://www.spotify.com'.$links[$i]['attributes']['href'].'premium/', $rel); */
$price = NULL;
// todo: duo prices
// todo: family plan prices
if ($price == NULL){
$summary_tmp = current(array_filter($summary, function($summary) use ($rel) {
return $summary['rel'] == $rel;
}));
$price = $summary_tmp['price'];
$restoredPrice = true;
};
$rate = round($exchange['rates'][$countrycodes[$rel]['currency']], 2);
$convertedPrice = $price/$rate;
$convertedPrice = round($convertedPrice, 2);
$countries[$i] = ['title' => $countrycodes[$rel]['title'], 'rel' => $rel, 'countryCode' => $countrycodes[$rel]['countryCode'], 'currency' => $countrycodes[$rel]['currency'],
'region' => $countrycodes[$rel]['region'], 'restoredPrice' => $restoredPrice, 'price' => $price, 'f_price' => '', 'convertedPrice' => $convertedPrice, 'f_convertedPrice' => ''];
};
$countries = unique_multidim_array($countries, 'rel');
foreach($countries as $country) {
$data = $data.',{"title":"'.$country['title'].'","rel":"'.$country['rel'].'","currency":"'.$country['currency'].'","countryCode":"'.$country['countryCode'].
'","region":"'.$country['region'].'","restoredPrice":'.$country['restoredPrice'].',"price":'.$country['price'].',"f_price":0,"convertedPrice":'.$country['convertedPrice'].'}';
};
$data = substr($data, 1);
$data = '['.$data.']';
$check_data = json_decode($data);
if (json_last_error() === JSON_ERROR_NONE) {
$file = fopen(__DIR__.'/data/summary.json', 'w');
fwrite($file, $data);
fclose($file);
}
else {
echo "JSON not valid";
};
}
else {
echo 'Bad link';
};
?>