forked from sc1341/InstagramOSINT
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
executable file
·216 lines (194 loc) · 10.9 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
#! /usr/bin/env python3
# Instagram Scraper
# Coded by sc1341
# http://github.com/sc1341/InstagramOSINT
# I am not responsible for anything you do with this script
# This is the main script meant to be run from the command line
#
#
import argparse
from bs4 import BeautifulSoup
import json
import os
import requests
import random
import string
import sys
import time
from banner import banner
class colors:
HEADER = '\033[95m'
OKBLUE = '\033[94m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
ENDC = '\033[0m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
class InstagramOSINT:
def __init__(self, username, downloadPhotos):
self.useragents = ['Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/602.2.14 (KHTML, like Gecko) Version/10.0.1 Safari/602.2.14',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0']
self.username = username
# Make the directory that we are putting the files into
self.make_directory()
print(colors.OKGREEN + f"[*] Starting Scan on {self.username}" + colors.ENDC)
# Get the html data with the requests module
r = requests.get(f'http://instagram.com/{self.username}', headers={'User-Agent': random.choice(self.useragents)})
soup = BeautifulSoup(r.text, 'html.parser')
# To prevent a unicode error, we need the following line...
soup.encode('utf-8')
# Find the tags that hold the data we want to parse
general_data = soup.find_all('meta', attrs={'property': 'og:description'})
more_data = soup.find_all('script', attrs={'type': 'text/javascript'})
description = soup.find('script', attrs={'type': 'application/ld+json'})
# Try to parse the content -- if it fails then the program exits
try:
self.text = general_data[0].get('content').split()
# This is the profile description data
self.description = json.loads(description.get_text())
# This is the javascript json that is passed into json.loads()
self.profile_meta = json.loads(more_data[3].get_text()[21:].strip(';'))
except:
print(colors.FAIL + f"Username {self.username} not found" + colors.ENDC)
sys.exit()
self.profile_data = {"Username": self.profile_meta['entry_data']['ProfilePage'][0]['graphql']['user']['username'],
"Profile name": self.description['name'],
"URL": self.description['mainEntityofPage']['@id'],
"Followers": self.text[0], "Following": self.text[2], "Posts": self.text[4],
"Bio": str(
self.profile_meta['entry_data']['ProfilePage'][0]['graphql']['user']['biography']),
"profile_pic_url": str(self.profile_meta['entry_data']['ProfilePage'][0]['graphql']['user'][
'profile_pic_url_hd']),
"is_business_account": str(
self.profile_meta['entry_data']['ProfilePage'][0]['graphql']['user'][
'is_business_account']),
"connected_to_fb": str(self.profile_meta['entry_data']['ProfilePage'][0]['graphql']['user'][
'connected_fb_page']),
"externalurl": str(
self.profile_meta['entry_data']['ProfilePage'][0]['graphql']['user']['external_url']),
"joined_recently": str(self.profile_meta['entry_data']['ProfilePage'][0]['graphql']['user'][
'is_joined_recently']),
"business_category_name": str(
self.profile_meta['entry_data']['ProfilePage'][0]['graphql']['user'][
'business_category_name']),
"is_private": str(
self.profile_meta['entry_data']['ProfilePage'][0]['graphql']['user']['is_private']),
"is_verified": str(
self.profile_meta['entry_data']['ProfilePage'][0]['graphql']['user']['is_verified'])}
# Tries to scrape posts if it is a public profile
self.save_data()
if downloadPhotos == True:
self.scrape_posts()
self.print_data()
def scrape_posts(self):
"""Scrapes all posts and downloads thumbnails when necessary
:return: none
:param: none
"""
if self.profile_data['is_private'].lower() == 'true':
print("[*]Private profile, cannot scrape photos!")
else:
print("[*]Getting Photos")
posts = {}
for index, post in enumerate(self.profile_meta['entry_data']['ProfilePage'][0]['graphql']['user']['edge_owner_to_timeline_media']['edges']):
os.mkdir(str(index))
try:
posts[index] = {"Caption": str(post['node']['edge_media_to_caption']['edges'][0]['node']['text']),
"Number of Comments": str(post['node']['edge_media_to_comment']['count']),
"Comments Disabled": str(post['node']['comments_disabled']),
"Taken At Timestamp": str(post['node']['taken_at_timestamp']),
"Number of Likes": str(post['node']['edge_liked_by']['count']),
"Location": str(post['node']['location']),
"Accessability Caption": str(post['node']['accessibility_caption'])
}
except IndexError:
posts[index] = {"Caption": 'No Caption on this post',
"Number of Comments": str(post['node']['edge_media_to_comment']['count']),
"Comments Disabled": str(post['node']['comments_disabled']),
"Taken At Timestamp": str(post['node']['taken_at_timestamp']),
"Number of Likes": str(post['node']['edge_liked_by']['count']),
"Location": str(post['node']['location']),
"Accessability Caption": str(post['node']['accessibility_caption'])
}
# Downloads the thumbnails of the post
# Picture is just an int index of the url in the list
with open(f'{os.getcwd()}/{index}/' + ''.join([random.choice(string.ascii_uppercase) for x in range(random.randint(1, 9))]) + '.jpg', 'wb') as f:
# Delay the request times randomly (be nice to Instagram)
time.sleep(random.randint(5, 10))
r = requests.get(post['node']['thumbnail_resources'][0]['src'], headers={'User-Agent':random.choice(self.useragents)})
# Takes the content of r and puts it into the file
f.write(r.content)
print("Got an Image")
with open('posts.txt', 'w') as f:
f.write(json.dumps(posts))
def make_directory(self):
"""Makes the profile directory and changes the cwd to it
:return: True
"""
try:
os.mkdir(self.username)
os.chdir(self.username)
except FileExistsError:
num = 0
# This is a loop to keep trying to make a new directory if a scan has already
# been done on a profile and that directory exists
while os.path.exists(self.username):
num += 1
try:
os.mkdir(self.username + str(num))
os.chdir(self.username + str(num))
except FileExistsError:
pass
def save_data(self):
"""Saves the data to the username directory
:return: none
:param: none
"""
with open('data.txt', 'w') as f:
f.write(json.dumps(self.profile_data))
# Downloads the profile Picture
self.download_profile_picture()
print(f"Saved data to directory {os.getcwd()}")
def print_data(self):
"""Prints out the data to the screen by iterating through the dict with it's key and value
:return: none
:param: none
"""
# Print the data out to the user
print(colors.HEADER + "---------------------------------------------" + colors.ENDC)
print(colors.OKGREEN + f"Results: scan for {self.profile_data['Username']} on instagram" + colors.ENDC)
for key, value in self.profile_data.items():
print(key + ': ' + value)
def download_profile_picture(self):
"""Downloads the profile pic and saves it to the directory
:return: none
:param: none
"""
with open("profile_pic.jpg", "wb") as f:
time.sleep(1)
r = requests.get(self.profile_data['profile_pic_url'], headers={'User-Agent':random.choice(self.useragents)})
f.write(r.content)
def parse_args():
parser = argparse.ArgumentParser(description="Instagram OSINT tool")
parser.add_argument("--username", help="profile username", required=True, nargs=1)
parser.add_argument("--downloadPhotos", help="Downloads the users photos if their account is public", required=False, action='store_true')
return parser.parse_args()
def main():
args = parse_args()
print(colors.OKBLUE + banner + colors.ENDC)
if args.username[0].strip() == '':
print("Please enter the username")
sys.exit()
else:
osint = InstagramOSINT(username=args.username[0], downloadPhotos=args.downloadPhotos)
if __name__ == '__main__':
main()