-
Notifications
You must be signed in to change notification settings - Fork 0
/
playstore_scraper.py
93 lines (93 loc) · 4.16 KB
/
playstore_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import random
import os
from google_play_scraper import app, search
import time
from pymongo import MongoClient
mongo_uri = 'mongodb://localhost:27017/'
mongo_db = 'game_data111'
mongo_collection = 'games'
categories = [
'Action-games' + random.choice(['', 's']),
'Adventure-games' + random.choice(['', 's']),
'Arcade-games' + random.choice(['', 's']),
'Board-games' + random.choice(['', 's']),
'Card-games' + random.choice(['', 's']),
'Casino-games' + random.choice(['', 's']),
'Casual-games' + random.choice(['', 's']),
'Educational-games' + random.choice(['', 's']),
'Music-games' + random.choice(['', 's']),
'Puzzle-games' + random.choice(['', 's']),
'Role-Playing-games' + random.choice(['', 's']),
'Simulation-games' + random.choice(['', 's']),
'Sports-games' + random.choice(['', 's']),
'Strategy-games' + random.choice(['', 's']),
'Trivia-games' + random.choice(['', 's']),
'Word-games' + random.choice(['', 's']),
'Racing-games' + random.choice(['', 's']),
'Classic-games' + random.choice(['', 's']),
'Board-games' + random.choice(['', 's']),
'Card-games' + random.choice(['', 's']),
'Casino-games' + random.choice(['', 's']),
'Educational-games' + random.choice(['', 's']),
'Music-games' + random.choice(['', 's']),
'Puzzle-games' + random.choice(['', 's']),
'Role-Playing-games' + random.choice(['', 's']),
'Simulation-games' + random.choice(['', 's']),
'Sports-games' + random.choice(['', 's']),
'Strategy-games' + random.choice(['', 's']),
'Trivia-games' + random.choice(['', 's']),
'Word-games' + random.choice(['', 's']),
'Racing-games' + random.choice(['', 's']),
'Classic-games' + random.choice(['', 's']),
'Casino-games' + random.choice(['', 's']),
'Educational-games' + random.choice(['', 's']),
'Music-games' + random.choice(['', 's']),
'Puzzle-games' + random.choice(['', 's']),
'Role-Playing-games' + random.choice(['', 's']),
'Simulation-games' + random.choice(['', 's']),
'Sports-games' + random.choice(['', 's']),
'Strategy-games' + random.choice(['', 's']),
'Trivia-games' + random.choice(['', 's']),
'Word-games' + random.choice(['', 's']),
'Racing-games' + random.choice(['', 's']),
'Classic-games' + random.choice(['', 's']),
]
max_apps = 10000
def clean_file_name(name):
return "".join([c for c in name if c.isalnum() or c in (' ', '_', '-')])
def scrape_and_save_data():
app_count = 0
client = MongoClient(mongo_uri)
db = client[mongo_db]
collection = db[mongo_collection]
while app_count < max_apps:
game_titles = set()
for _ in range(len(categories)):
random_category = random.choice(categories)
results = search(random_category)
for result in results:
app_id = result['appId']
app_info = app(app_id)
required_fields = {
'Game Title': app_info['title'],
'Category Tags': app_info.get('genre', 'N/A'),
'Game Version': app_info.get('version', 'N/A'),
'Last Updated On': app_info.get('updated', 'N/A'),
'Released On': app_info.get('released', 'N/A'),
'In-App Purchase Required': not app_info.get('free', True),
'Developer Website': app_info.get('developerWebsite', 'N/A'),
'Phone Number': 'N/A', # You may not have a direct phone number in this structure
'Support Email': app_info.get('developerEmail', 'N/A'),
'Address': app_info.get('developerAddress', 'N/A'),
}
game_title = clean_file_name(required_fields['Game Title'])
if not collection.find_one({'Game Title': game_title}):
collection.insert_one(required_fields)
app_count += 1
if app_count >= max_apps:
break
if app_count >= max_apps:
break
time.sleep(3600)
if __name__ == '__main__':
scrape_and_save_data()