-
Notifications
You must be signed in to change notification settings - Fork 1.7k
/
Copy pathResolveShortenedURL.py
399 lines (314 loc) · 14.6 KB
/
ResolveShortenedURL.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
import demistomock as demisto # noqa: F401
from CommonServerPython import * # noqa: F401
import urllib3
from abc import ABCMeta
from typing import NamedTuple, Type
from requests import Response
urllib3.disable_warnings() # Disable insecure warnings
DEFAULT_SERVICE = "unshorten.me"
DEFAULT_REDIRECT_LIMIT = "0"
class URLUnshorteningData(NamedTuple):
"""
A tuple containing data for unshortend URLs.
Attributes:
original_url (str): The original URL.
resolved_url (str): The resolved URL.
service_name (str): The name of the service used to resolve the URL.
redirect_history (list): A list of URLs that were redirected to get to the resolved URL.
raw_data (dict | list[dict] | None, optional): The raw data returned by the service. None if not available.
encountered_error (bool, optional): Whether an error was encountered while resolving the URL. Defaults to False.
api_usage (int | None, optional): The API usage count for the current IP. None if not relevant to the service.
api_rate_limit (int | None, optional): The maximum number of API calls allowed by the service
for a defined period of time. None if not relevant to the service.
"""
original_url: str
resolved_url: str
service_name: str
redirect_history: list[str]
raw_data: dict | list[dict] | None = None
encountered_error: bool = False
api_usage: int | None = None
api_rate_limit: int | None = None
def to_context_dict(self) -> dict:
"""
Converts the data to a dictionary that will be used as the context data.
Adds recursion data only if relevant.
Note:
We subtract 1 from RedirectCount because the original URL is included in the recursion history.
Returns:
dict: A dictionary containing the data in context format.
"""
data = {
"OriginalURL": self.original_url,
"ResolvedURL": self.resolved_url,
"ServiceName": self.service_name,
"RedirectCount": len(self.redirect_history) - 1,
"RedirectHistory": self.redirect_history,
"EncounteredError": self.encountered_error,
}
if self.api_usage is not None:
data["APIUsageCount"] = self.api_usage
if self.api_rate_limit is not None:
data["APIRateLimit"] = self.api_rate_limit
return data
def to_hr_dict(self) -> dict:
"""
Converts the data to a dictionary that will be used as the human-readable data.
Returns:
dict: A dictionary containing the data in human-readable format.
"""
api_usage_hr: str | None = None
if self.api_usage is not None:
api_usage_hr = f"{self.api_usage}"
if self.api_rate_limit is not None:
api_usage_hr += f"/{self.api_rate_limit}"
return {
"Original URL": self.original_url,
"Resolved URL": self.resolved_url,
"Service Used": self.service_name,
"Redirect History": self.redirect_history if len(self.redirect_history) > 1 else None,
"API Usage Count": api_usage_hr if api_usage_hr is not None else None,
}
class URLUnshortingService(BaseClient, metaclass=ABCMeta):
"""
An abstract base class for URL unshorteners.
Note:
To add a new service, create a new class that inherits from this class, and implements the `resolve_url` method.
The class attribute `service_name` must match the name used in the under `service` on the YAML file.
Once created new service should be automatically detected and used.
Attributes:
base_url (str): The base URL of the service that will be used for sending requests.
service_name (str): The name of the service.
redirect_limit (int | None): The maximum number of redirects to follow. None if no limit.
"""
base_url: str
service_name: str
service_rate_limit: int | None = None
def __init__(self, redirect_limit: int | None = None, **kwargs):
super().__init__(base_url=self.base_url, **kwargs)
self.redirect_limit = redirect_limit if redirect_limit is not None else 0
def hit_redirect_limit(self, redirect_history: list) -> bool:
"""
Checks whether the redirect limit has been reached.
Args:
redirect_history (list): The redirect history of the URL.
Returns:
bool: True if the redirect limit has been reached, False otherwise.
"""
if self.redirect_limit == 0:
return False
return len(redirect_history) >= self.redirect_limit
@staticmethod
def find_matching_service(service_name: str) -> Type["URLUnshortingService"]:
"""
Finds a matching service class by name.
Args:
service_name (str): The service name to find (has to match the service_name class attribute).
Returns:
Type[URLUnshortingService]: A subclass of URLUnshortingService that matches the service name.
"""
for service_class in URLUnshortingService.__subclasses__():
if service_class.service_name.casefold() == service_name.casefold():
return service_class
raise ValueError(f"No matching service was found for: \"{service_name}\".")
@abstractmethod
def resolve_url(self, url: str) -> URLUnshorteningData: # pragma: no cover
"""
Resolve a shortened URL.
Args:
url (str): The URL to resolve.
Returns:
URLUnshorteningData: A NamedTuple containing the data for the resolved URL.
"""
pass
class LongurlInService(URLUnshortingService):
"""
A class for unshortening URLs using longurl.in.
Note:
If the URL is invalid, the API returns {"status": "Failed", "message": "url is invalid"} with a 404 status_code.
"""
base_url = "https://longurl.in/api/expand-url"
service_name = "longurl.in"
def resolve_url(self, url: str) -> URLUnshorteningData:
encountered_error: bool = False
original_url: str = url
response: dict = self._http_request(
method="POST",
full_url=self.base_url,
resp_type="json",
data={"shortURL": url},
error_handler=lambda _: None, # Disable exception raising if API returns a 404
)
raw_data: list[dict] = [response]
redirect_history: list[str] = []
# Assure API's `status` key exists and equals "OK", and that `data` key exists with at least one element.
while (response.get("status") == "OK") and len(response.get("data", [])) > 0 and \
response["data"][0] is not None and (not self.hit_redirect_limit(redirect_history)):
url = response["data"][0]
response = self._http_request(
method="POST",
full_url=self.base_url,
resp_type="json",
data={"shortURL": url},
error_handler=lambda _: None, # Disable exception raising if API returns a 404
)
raw_data.append(response)
redirect_history.append(url)
# Stopped because of an error, without hitting `redirect_limit` (which would mean we don't care about the error)
if response.get("status") != "OK" and (not self.hit_redirect_limit(redirect_history)):
encountered_error = True
return URLUnshorteningData(
original_url=original_url,
resolved_url=url,
service_name=self.service_name,
redirect_history=[original_url] + redirect_history,
raw_data=raw_data,
encountered_error=encountered_error,
)
class UnshortenMeSservice(URLUnshortingService):
"""A class for unshortening URLs using unshorten.me."""
base_url = "https://unshorten.me/json/"
service_name = "unshorten.me"
service_rate_limit = 10
def resolve_url(self, url: str) -> URLUnshorteningData:
encountered_error: bool = False
original_url: str = url
response: dict = self._http_request(
method="GET",
url_suffix=url,
resp_type="json",
)
usage_count = response.get("usage_count", 0)
raw_data: list[dict] = [response]
redirect_history: list[str] = []
previous_resolved_url: str | None = None
while (previous_resolved_url != url) and response.get("success") and \
(not self.hit_redirect_limit(redirect_history)):
previous_resolved_url = url
url = response.get("resolved_url", "")
response = self._http_request(
method="GET",
url_suffix=url,
resp_type="json",
)
raw_data.append(response)
redirect_history.append(url)
if response.get("usage_count"):
usage_count = response["usage_count"]
# Stopped because of an error, without hitting `redirect_limit` (which would mean we don't care about the error)
if (not response.get("success")) and (not self.hit_redirect_limit(redirect_history)):
encountered_error = True
# If the last URL in the redirect history is the same as the resolved URL,
# or if the resolved URL is the same as the original URL, remove it from `redirect_history`.
if (len(redirect_history) >= 2 and redirect_history[-1] == redirect_history[-2]) or \
(len(redirect_history) == 1 and redirect_history[0] == original_url):
redirect_history.pop()
return URLUnshorteningData(
original_url=original_url,
resolved_url=url,
service_name=self.service_name,
redirect_history=[original_url] + redirect_history,
raw_data=raw_data,
encountered_error=encountered_error,
api_usage=usage_count,
api_rate_limit=self.service_rate_limit,
)
class BuiltInShortener(URLUnshortingService):
"""A class for unshortening URLs using Python requests."""
base_url = ""
service_name = "Built-In"
def resolve_url(self, url: str) -> URLUnshorteningData:
encountered_error = False
original_url = url
try:
response: Response = self._http_request(
method="GET",
full_url=url,
resp_type="response",
allow_redirects=False,
)
except Exception:
encountered_error = True
redirect_history: list[str] = []
while not encountered_error and \
(response.is_redirect and (not self.hit_redirect_limit(redirect_history))):
url = response.headers["location"]
redirect_history.append(url)
try:
response = self._http_request(
method="GET",
full_url=url,
resp_type="response",
allow_redirects=False,
)
except Exception:
if not self.hit_redirect_limit(redirect_history):
encountered_error = True
break
return URLUnshorteningData(
original_url=original_url,
resolved_url=url,
service_name=self.service_name,
redirect_history=[original_url] + redirect_history,
encountered_error=encountered_error,
)
def unshorten_url(service_name: str, url: str, redirect_limit: int, use_system_proxy: bool = False,
session_verify: bool = True) -> CommandResults:
"""
Unshorten a shortened URL.
Args:
service_name (str): The service to use for unshortening.
url (str): The URL to un-shorten.
use_system_proxy (bool): Whether to use the system proxy.
session_verify (bool): Whether to verify the SSL certificate of the request.
redirect_limit (int): A maximum number of recursions to run. Use 0 for unlimited.
"""
error_message = "There was an error while attempting to unshorten the final URL in the redirect chain.\n" \
"It is possible that the unshortening process was not fully completed.\n\n"
service_class = URLUnshortingService.find_matching_service(service_name=service_name)
service_instance = service_class(redirect_limit=redirect_limit,
proxy=use_system_proxy,
verify=session_verify)
returned_data = service_instance.resolve_url(url=url)
readable_output = ""
if returned_data.encountered_error:
readable_output += error_message
readable_output += tableToMarkdown(name="URL Unshortening Results",
t=returned_data.to_hr_dict(),
headers=list(returned_data.to_hr_dict().keys()),
removeNull=True)
return CommandResults(
outputs_prefix="ResolveShortenedURL",
outputs_key_field="OriginalURL",
outputs=returned_data.to_context_dict(),
readable_output=readable_output,
indicator=Common.URL(url=returned_data.resolved_url,
dbot_score=Common.DBotScore(
indicator=returned_data.resolved_url,
indicator_type=DBotScoreType.URL,
integration_name="ResolveShortenedURL",
score=Common.DBotScore.NONE,
)),
raw_response=returned_data.raw_data,
)
def main(): # pragma: no cover
args = demisto.args()
try:
url: str = args["url"]
service: str = args.get("service", DEFAULT_SERVICE)
use_system_proxy = argToBoolean(args.get("use_system_proxy", "False"))
redirect_limit = arg_to_number(args.get("redirect_limit", DEFAULT_REDIRECT_LIMIT))
# `arg_to_number` returns `None` if int conversion was unsuccessful.
if redirect_limit is None:
raise ValueError("'redirect_limit' must be a natural number.")
session_verify = not argToBoolean(args.get("insecure", "False"))
result = unshorten_url(service_name=service,
url=url,
use_system_proxy=use_system_proxy,
redirect_limit=redirect_limit,
session_verify=session_verify)
return_results(result)
except Exception as e:
return_error(f"Error: {str(e)}")
if __name__ in ("__main__", "__builtin__", "builtins"): # pragma: no cover
main()