Skip to content

Commit

Permalink
Merge branch 'main' into fix-regex
Browse files Browse the repository at this point in the history
  • Loading branch information
HarHarLinks committed Oct 3, 2023
2 parents 255da46 + b9efcf1 commit 2885532
Show file tree
Hide file tree
Showing 11 changed files with 388 additions and 80 deletions.
45 changes: 45 additions & 0 deletions .github/workflows/release-maubot-plugin.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
name: Release Maubot Plugin

on:
push:
branches:
- main

jobs:
build:
runs-on: ubuntu-latest
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

steps:
- name: Check out code
uses: actions/checkout@v3
with:
fetch-depth: 0

- name: Extract version from maubot.yaml
run: |
VERSION=$(cat maubot.yaml | grep 'version' | cut -d ':' -f 2 | xargs)
echo "VERSION=$VERSION" >> $GITHUB_ENV
- name: Check if release already exists
run: |
RELEASE_EXIST=$(gh release view $VERSION > /dev/null 2>&1 && echo "true" || echo "false")
echo "RELEASE_EXIST=$RELEASE_EXIST" >> $GITHUB_ENV
- name: Generate Changelog
if: env.RELEASE_EXIST == 'false'
run: |
echo "Changelog:" > CHANGELOG.md
git log $(git describe --tags --abbrev=0)..HEAD --pretty=format:"- %h: %s" -- base-config.yaml maubot.yaml socialmediadownload.py instaloader >> CHANGELOG.md
- name: Package Release
if: env.RELEASE_EXIST == 'false'
run: |
zip -r package.zip base-config.yaml maubot.yaml socialmediadownload.py instaloader
mv package.zip package.mbp
- name: Create and Upload GitHub Release
if: env.RELEASE_EXIST == 'false'
run: |
gh release create ${{ env.VERSION }} package.mbp -t ${{ env.VERSION }} -F CHANGELOG.md
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,7 @@ dmypy.json

# Pyre type checker
.pyre/

# Maubot
*.zip
*.mbp
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# SocialMediaDownloadMaubot

[Maubot](https://github.com/maubot/maubot) plugin that downloads content from various social media websites given a link.
[Maubot](https://github.com/maubot/maubot) plugin that downloads content from various social media websites given a link and posts it to the Matrix room.

Currently supported:
- Reddit
Expand Down
2 changes: 1 addition & 1 deletion instaloader/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Download pictures (or videos) along with their captions and other metadata from Instagram."""


__version__ = '4.9.6'
__version__ = '4.10'


try:
Expand Down
25 changes: 21 additions & 4 deletions instaloader/instaloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -524,7 +524,7 @@ def download_title_pic(self, url: str, target: Union[str, Path], name_suffix: st
.. versionadded:: 4.3"""

http_response = self.context.get_raw(url)
date_object = None # type: Optional[datetime]
date_object: Optional[datetime] = None
if 'Last-Modified' in http_response.headers:
date_object = datetime.strptime(http_response.headers["Last-Modified"], '%a, %d %b %Y %H:%M:%S GMT')
date_object = date_object.replace(tzinfo=timezone.utc)
Expand Down Expand Up @@ -585,6 +585,23 @@ def download_hashtag_profilepic(self, hashtag: Hashtag) -> None:
.. versionadded:: 4.4"""
self.download_title_pic(hashtag.profile_pic_url, '#' + hashtag.name, 'profile_pic', None)

@_requires_login
def save_session(self) -> dict:
"""Saves internally stored :class:`requests.Session` object to :class:`dict`.
:raises LoginRequiredException: If called without being logged in.
.. versionadded:: 4.10
"""
return self.context.save_session()

def load_session(self, username: str, session_data: dict) -> None:
"""Internally stores :class:`requests.Session` object from :class:`dict`.
.. versionadded:: 4.10
"""
self.context.load_session(username, session_data)

@_requires_login
def save_session_to_file(self, filename: Optional[str] = None) -> None:
"""Saves internally stored :class:`requests.Session` object.
Expand Down Expand Up @@ -712,7 +729,7 @@ def _all_already_downloaded(path_base, is_videos_enumerated) -> bool:
post.get_sidecar_nodes(self.slide_start, self.slide_end),
start=self.slide_start % post.mediacount + 1
):
suffix = str(edge_number) # type: Optional[str]
suffix: Optional[str] = str(edge_number)
if '{filename}' in self.filename_pattern:
suffix = None
if self.download_pictures and (not sidecar_node.is_video or self.download_video_thumbnails):
Expand Down Expand Up @@ -945,11 +962,11 @@ def download_highlights(self,
"""
for user_highlight in self.get_highlights(user):
name = user_highlight.owner_username
highlight_target = (filename_target
highlight_target: Union[str, Path] = (filename_target
if filename_target
else (Path(_PostPathFormatter.sanitize_path(name, self.sanitize_paths)) /
_PostPathFormatter.sanitize_path(user_highlight.title,
self.sanitize_paths))) # type: Union[str, Path]
self.sanitize_paths)))
self.context.log("Retrieving highlights \"{}\" from profile {}".format(user_highlight.title, name))
self.download_highlight_cover(user_highlight, highlight_target)
totalcount = user_highlight.itemcount
Expand Down
140 changes: 120 additions & 20 deletions instaloader/instaloadercontext.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import textwrap
import time
import urllib.parse
import uuid
from contextlib import contextmanager
from datetime import datetime, timedelta
from functools import partial
Expand Down Expand Up @@ -36,6 +37,34 @@ def default_user_agent() -> str:
'(KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36'


def default_iphone_headers() -> Dict[str, Any]:
return {'User-Agent': 'Instagram 273.0.0.16.70 (iPad13,8; iOS 16_3; en_US; en-US; ' \
'scale=2.00; 2048x2732; 452417278) AppleWebKit/420+',
'x-ads-opt-out': '1',
'x-bloks-is-panorama-enabled': 'true',
'x-bloks-version-id': '01507c21540f73e2216b6f62a11a5b5e51aa85491b72475c080da35b1228ddd6',
'x-fb-client-ip': 'True',
'x-fb-connection-type': 'wifi',
'x-fb-http-engine': 'Liger',
'x-fb-server-cluster': 'True',
'x-fb': '1',
'x-ig-abr-connection-speed-kbps': '2',
'x-ig-app-id': '124024574287414',
'x-ig-app-locale': 'en-US',
'x-ig-app-startup-country': 'US',
'x-ig-bandwidth-speed-kbps': '0.000',
'x-ig-capabilities': '36r/F/8=',
'x-ig-connection-speed': '{}kbps'.format(random.randint(1000, 20000)),
'x-ig-connection-type': 'WiFi',
'x-ig-device-locale': 'en-US',
'x-ig-mapped-locale': 'en-US',
'x-ig-timezone-offset': str((datetime.now().astimezone().utcoffset() or timedelta(seconds=0)).seconds),
'x-ig-www-claim': '0',
'x-pigeon-session-id': str(uuid.uuid4()),
'x-tigon-is-retry': 'False',
'x-whatsapp': '0'}


class InstaloaderContext:
"""Class providing methods for (error) logging and low-level communication with Instagram.
Expand All @@ -61,16 +90,18 @@ def __init__(self, sleep: bool = True, quiet: bool = False, user_agent: Optional
self.request_timeout = request_timeout
self._session = self.get_anonymous_session()
self.username = None
self.user_id = None
self.sleep = sleep
self.quiet = quiet
self.max_connection_attempts = max_connection_attempts
self._graphql_page_length = 50
self._root_rhx_gis = None
self.two_factor_auth_pending = None
self.iphone_support = iphone_support
self.iphone_headers = default_iphone_headers()

# error log, filled with error() and printed at the end of Instaloader.main()
self.error_log = [] # type: List[str]
self.error_log: List[str] = []

self._rate_controller = rate_controller(self) if rate_controller is not None else RateController(self)

Expand All @@ -81,20 +112,26 @@ def __init__(self, sleep: bool = True, quiet: bool = False, user_agent: Optional
self.fatal_status_codes = fatal_status_codes or []

# Cache profile from id (mapping from id to Profile)
self.profile_id_cache = dict() # type: Dict[int, Any]
self.profile_id_cache: Dict[int, Any] = dict()

@contextmanager
def anonymous_copy(self):
session = self._session
username = self.username
user_id = self.user_id
iphone_headers = self.iphone_headers
self._session = self.get_anonymous_session()
self.username = None
self.user_id = None
self.iphone_headers = default_iphone_headers()
try:
yield self
finally:
self._session.close()
self.username = username
self._session = session
self.user_id = user_id
self.iphone_headers = iphone_headers

@property
def is_logged_in(self) -> bool:
Expand Down Expand Up @@ -170,22 +207,30 @@ def get_anonymous_session(self) -> requests.Session:
session.request = partial(session.request, timeout=self.request_timeout) # type: ignore
return session

def save_session_to_file(self, sessionfile):
"""Not meant to be used directly, use :meth:`Instaloader.save_session_to_file`."""
pickle.dump(requests.utils.dict_from_cookiejar(self._session.cookies), sessionfile)
def save_session(self):
"""Not meant to be used directly, use :meth:`Instaloader.save_session`."""
return requests.utils.dict_from_cookiejar(self._session.cookies)

def load_session_from_file(self, username, sessionfile):
"""Not meant to be used directly, use :meth:`Instaloader.load_session_from_file`."""
def load_session(self, username, sessiondata):
"""Not meant to be used directly, use :meth:`Instaloader.load_session`."""
session = requests.Session()
session.cookies = requests.utils.cookiejar_from_dict(pickle.load(sessionfile))
session.cookies = requests.utils.cookiejar_from_dict(sessiondata)
session.headers.update(self._default_http_header())
session.headers.update({'X-CSRFToken': session.cookies.get_dict()['csrftoken']})
# Override default timeout behavior.
# Need to silence mypy bug for this. See: https://github.com/python/mypy/issues/2427
session.request = partial(session.request, timeout=self.request_timeout) # type: ignore
session.request = partial(session.request, timeout=self.request_timeout) # type: ignore
self._session = session
self.username = username

def save_session_to_file(self, sessionfile):
"""Not meant to be used directly, use :meth:`Instaloader.save_session_to_file`."""
pickle.dump(self.save_session(), sessionfile)

def load_session_from_file(self, username, sessionfile):
"""Not meant to be used directly, use :meth:`Instaloader.load_session_from_file`."""
self.load_session(username, pickle.load(sessionfile))

def test_login(self) -> Optional[str]:
"""Not meant to be used directly, use :meth:`Instaloader.test_login`."""
data = self.graphql_query("d6f4427fbe92d846298cf93df0b937d3", {})
Expand Down Expand Up @@ -265,6 +310,7 @@ def login(self, user, passwd):
session.headers.update({'X-CSRFToken': login.cookies['csrftoken']})
self._session = session
self.username = user
self.user_id = resp_json['userId']

def two_factor_login(self, two_factor_code):
"""Second step of login if 2FA is enabled.
Expand Down Expand Up @@ -298,7 +344,8 @@ def do_sleep(self):
time.sleep(min(random.expovariate(0.6), 15.0))

def get_json(self, path: str, params: Dict[str, Any], host: str = 'www.instagram.com',
session: Optional[requests.Session] = None, _attempt=1) -> Dict[str, Any]:
session: Optional[requests.Session] = None, _attempt=1,
response_headers: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
"""JSON request to Instagram.
:param path: URL, relative to the given domain which defaults to www.instagram.com/
Expand All @@ -325,8 +372,11 @@ def get_json(self, path: str, params: Dict[str, Any], host: str = 'www.instagram
resp = sess.get('https://{0}/{1}'.format(host, path), params=params, allow_redirects=False)
if resp.status_code in self.fatal_status_codes:
redirect = " redirect to {}".format(resp.headers['location']) if 'location' in resp.headers else ""
raise AbortDownloadException("Query to https://{}/{} responded with \"{} {}\"{}".format(
host, path, resp.status_code, resp.reason, redirect
body = ""
if resp.headers['Content-Type'].startswith('application/json'):
body = ': ' + resp.text[:500] + ('…' if len(resp.text) > 501 else '')
raise AbortDownloadException("Query to https://{}/{} responded with \"{} {}\"{}{}".format(
host, path, resp.status_code, resp.reason, redirect, body
))
while resp.is_redirect:
redirect_url = resp.headers['location']
Expand All @@ -335,13 +385,16 @@ def get_json(self, path: str, params: Dict[str, Any], host: str = 'www.instagram
redirect_url.startswith('https://i.instagram.com/accounts/login')):
if not self.is_logged_in:
raise LoginRequiredException("Redirected to login page. Use --login.")
# alternate rate limit exceeded behavior
raise TooManyRequestsException("Redirected to login")
raise AbortDownloadException("Redirected to login page. You've been logged out, please wait " +
"some time, recreate the session and try again")
if redirect_url.startswith('https://{}/'.format(host)):
resp = sess.get(redirect_url if redirect_url.endswith('/') else redirect_url + '/',
params=params, allow_redirects=False)
else:
break
if response_headers is not None:
response_headers.clear()
response_headers.update(resp.headers)
if resp.status_code == 400:
raise QueryReturnedBadRequestException("400 Bad Request")
if resp.status_code == 404:
Expand Down Expand Up @@ -392,7 +445,8 @@ def get_json(self, path: str, params: Dict[str, Any], host: str = 'www.instagram
self._rate_controller.handle_429('iphone')
if is_other_query:
self._rate_controller.handle_429('other')
return self.get_json(path=path, params=params, host=host, session=sess, _attempt=_attempt + 1)
return self.get_json(path=path, params=params, host=host, session=sess, _attempt=_attempt + 1,
response_headers=response_headers)
except KeyboardInterrupt:
self.error("[skipped by user]", repeat_at_end=False)
raise ConnectionException(error_string) from err
Expand Down Expand Up @@ -482,11 +536,57 @@ def get_iphone_json(self, path: str, params: Dict[str, Any]) -> Dict[str, Any]:
.. versionadded:: 4.2.1"""
with copy_session(self._session, self.request_timeout) as tempsession:
tempsession.headers['User-Agent'] = 'Instagram 146.0.0.27.125 (iPhone12,1; iOS 13_3; en_US; en-US; ' \
'scale=2.00; 1656x3584; 190542906)'
for header in ['Host', 'Origin', 'X-Instagram-AJAX', 'X-Requested-With']:
# Set headers to simulate an API request from iPad
tempsession.headers['ig-intended-user-id'] = str(self.user_id)
tempsession.headers['x-pigeon-rawclienttime'] = '{:.6f}'.format(time.time())

# Add headers obtained from previous iPad request
tempsession.headers.update(self.iphone_headers)

# Extract key information from cookies if we haven't got it already from a previous request
header_cookies_mapping = {'x-mid': 'mid',
'ig-u-ds-user-id': 'ds_user_id',
'x-ig-device-id': 'ig_did',
'x-ig-family-device-id': 'ig_did',
'family_device_id': 'ig_did'}

# Map the cookie value to the matching HTTP request header
cookies = tempsession.cookies.get_dict().copy()
for key, value in header_cookies_mapping.items():
if value in cookies:
if key not in tempsession.headers:
tempsession.headers[key] = cookies[value]
else:
# Remove the cookie value if it's already specified as a header
tempsession.cookies.pop(value, None)

# Edge case for ig-u-rur header due to special string encoding in cookie
if 'rur' in cookies:
if 'ig-u-rur' not in tempsession.headers:
tempsession.headers['ig-u-rur'] = cookies['rur'].strip('\"').encode('utf-8') \
.decode('unicode_escape')
else:
tempsession.cookies.pop('rur', None)

# Remove headers specific to Desktop version
for header in ['Host', 'Origin', 'X-Instagram-AJAX', 'X-Requested-With', 'Referer']:
tempsession.headers.pop(header, None)
return self.get_json(path, params, 'i.instagram.com', tempsession)

# No need for cookies if we have a bearer token
if 'authorization' in tempsession.headers:
tempsession.cookies.clear()

response_headers = dict() # type: Dict[str, Any]
response = self.get_json(path, params, 'i.instagram.com', tempsession, response_headers=response_headers)

# Extract the ig-set-* headers and use them in the next request
for key, value in response_headers.items():
if key.startswith('ig-set-'):
self.iphone_headers[key.replace('ig-set-', '')] = value
elif key.startswith('x-ig-set-'):
self.iphone_headers[key.replace('x-ig-set-', 'x-ig-')] = value

return response

def write_raw(self, resp: Union[bytes, requests.Response], filename: str) -> None:
"""Write raw response data into a file.
Expand Down Expand Up @@ -582,7 +682,7 @@ def sleep(self, secs):

def __init__(self, context: InstaloaderContext):
self._context = context
self._query_timestamps = dict() # type: Dict[str, List[float]]
self._query_timestamps: Dict[str, List[float]] = dict()
self._earliest_next_request_time = 0.0
self._iphone_earliest_next_request_time = 0.0

Expand Down
Loading

0 comments on commit 2885532

Please sign in to comment.