disable instagram in REST API entirely due to rate limiting/blocking

see snarfed/bridgy#665 (comment)
snarfed · Aug 29, 2019 · 0d5f188 · 0d5f188
1 parent 975f37b
commit 0d5f188
Show file tree

Hide file tree

Showing 6 changed files with 65 additions and 11 deletions.
diff --git a/README.md b/README.md
@@ -125,6 +125,8 @@ By default, responses are cached and reused for 10m without re-fetching the sour
 
 To use the REST API in an existing ActivityStreams client, you'll need to hard-code exceptions for the domains you want to use e.g. `facebook.com`, and redirect HTTP requests to the corresponding [endpoint above](#about).
 
+Instagram is disabled in the REST API entirely, sadly, [due to their aggressive rate limiting and blocking](https://github.com/snarfed/bridgy/issues/665#issuecomment-524977427).
+
 The web UI ([granary.io](https://granary.io/)) currently only fetches Facebook access tokens for users. If you want to use it to access a Facebook page, you'll need to get an access token manually with the [Graph API Explorer](https://developers.facebook.com/tools/explorer/) (click on the _Get To..._ drop-down) . Then, log into Facebook on [granary.io](https://granary.io/) and paste the page access token into the `access_token` text box.
 
 
@@ -308,6 +310,7 @@ Changelog
   * Bug fix for large block list fetches that get rate limited after a few successful requests.
   * Handle HTTP 403 + error code 200 when fetching retweets for a protected or otherwise unavailable tweet ([bridgy#688](https://github.com/snarfed/bridgy/issues/688#issuecomment-520600329)).
 * Instagram:
+  * Disabled in the REST API entirely due to Instagram's aggressive rate limiting and blocking ([bridgy#655](https://github.com/snarfed/bridgy/issues/665#issuecomment-524977427)).
   * Update scraping to handle replies in new `edge_media_to_parent_comment` field ([#164](https://github.com/snarfed/granary/issues/164)).
 * microformats2:
   * Revise whitespace handling; use `white-space: pre` CSS in HTML output.

diff --git a/api.py b/api.py
@@ -123,7 +123,10 @@ def get(self):
       src = github.GitHub(
         access_token=util.get_required_param(self, 'access_token'))
     elif site == 'instagram':
-      src = instagram.Instagram(scrape=True)
+      if self.request.get('interactive').lower() == 'true':
+        src = instagram.Instagram(scrape=True)
+      else:
+        self.abort(400, 'Sorry, Instagram is not currently available in the REST API. Try https://instagram-atom.appspot.com/ instead!')
     else:
       src_cls = source.sources.get(site)
       if not src_cls:

diff --git a/docs/index.rst b/docs/index.rst
@@ -172,6 +172,10 @@ to hard-code exceptions for the domains you want to use e.g.
 ``facebook.com``, and redirect HTTP requests to the corresponding
 `endpoint above <#about>`__.
 
+Instagram is disabled in the REST API entirely, sadly, `due to their
+aggressive rate limiting and
+blocking <https://github.com/snarfed/bridgy/issues/665#issuecomment-524977427>`__.
+
 The web UI (`granary.io <https://granary.io/>`__) currently only fetches
 Facebook access tokens for users. If you want to use it to access a
 Facebook page, you’ll need to get an access token manually with the
@@ -326,10 +330,10 @@ too <https://github.com/snarfed/oauth-dropins#release-instructions>`__.)
     ``./docs/build.sh``.
 4.  ``git commit -am 'release vX.Y'``
 5.  Upload to `test.pypi.org <https://test.pypi.org/>`__ for testing.
-    ``sh  python3 setup.py clean build sdist  twine upload -r pypitest dist/granary-X.Y.tar.gz``
+    ``sh  python3 setup.py clean build sdist  setenv ver vX.Y  twine upload -r pypitest dist/granary-$ver.tar.gz``
 6.  Install from test.pypi.org, both Python 2 and 3.
-    ``sh  cd /tmp  virtualenv local  source local/bin/activate.csh  pip install -i https://test.pypi.org/simple --extra-index-url https://pypi.org/simple granary  deactivate``
-    ``sh  python3 -m venv local3  source local3/bin/activate.csh  pip3 install --upgrade pip  pip3 install -i https://test.pypi.org/simple --extra-index-url https://pypi.org/simple granary  deactivate``
+    ``sh  cd /tmp  virtualenv local  source local/bin/activate.csh  pip install -i https://test.pypi.org/simple --extra-index-url https://pypi.org/simple granary==$ver  deactivate``
+    ``sh  python3 -m venv local3  source local3/bin/activate.csh  pip3 install --upgrade pip  pip3 install -i https://test.pypi.org/simple --extra-index-url https://pypi.org/simple granary==$ver  deactivate``
 7.  Smoke test that the code trivially loads and runs, in both Python 2
     and 3.
 
@@ -363,7 +367,7 @@ too <https://github.com/snarfed/oauth-dropins#release-instructions>`__.)
     the release “title” in github), put ``### Notable changes`` on the
     second line, then copy and paste this version’s changelog contents
     below it.
-    ``sh  git tag -a vX.Y --cleanup=verbatim  git push  git push --tags``
+    ``sh  git tag -a $ver --cleanup=verbatim  git push  git push --tags``
 9.  `Click here to draft a new release on
     GitHub. <https://github.com/snarfed/granary/releases/new>`__ Enter
     ``vX.Y`` in the *Tag version* box. Leave *Release title* empty. Copy
@@ -431,6 +435,42 @@ Facebook and Twitter’s raw HTML.
 Changelog
 ---------
 
+2.1 - unreleased
+~~~~~~~~~~~~~~~~
+
+-  Convert AS2 ``Mention`` tags to AS1 ``objectType`` ``mention``
+   (non-standard) and vice versa
+   (`snarfed/bridgy-fed#46 <https://github.com/snarfed/bridgy-fed/issues/46>`__).
+-  Twitter:
+
+   -  Bug fix for large block list fetches that get rate limited after a
+      few successful requests.
+   -  Handle HTTP 403 + error code 200 when fetching retweets for a
+      protected or otherwise unavailable tweet
+      (`bridgy#688 <https://github.com/snarfed/bridgy/issues/688#issuecomment-520600329>`__).
+
+-  Instagram:
+
+   -  Disabled in the REST API entirely due to Instagram’s aggressive
+      rate limiting and blocking
+      (`bridgy#655 <https://github.com/snarfed/bridgy/issues/665#issuecomment-524977427>`__).
+   -  Update scraping to handle replies in new
+      ``edge_media_to_parent_comment`` field
+      (`#164 <https://github.com/snarfed/granary/issues/164>`__).
+
+-  microformats2:
+
+   -  Revise whitespace handling; use ``white-space: pre`` CSS in HTML
+      output.
+
+-  Facebook:
+
+   -  Bug fix: don’t interpret ``photo.php`` as username in post URLs.
+
+-  RSS:
+
+   -  Default title to ellipsized content.
+
 2.0 - 2019-03-01
 ~~~~~~~~~~~~~~~~
 

diff --git a/granary/templates/index.html b/granary/templates/index.html
@@ -269,6 +269,9 @@ <h1>
 {% endif %}
 </code>
 
+<input type="hidden" name="cache" value="false" />
+<input type="hidden" name="interactive" value="true" />
+
 <br>
 <button type="submit" class="btn btn-default"
         onclick="render_demo_request()">GET</button>
@@ -312,6 +315,7 @@ <h1>
 
 <input type="hidden" name="plaintext" value="true" />
 <input type="hidden" name="cache" value="false" />
+<input type="hidden" name="interactive" value="true" />
 <button type="submit" class="btn btn-default"
         onclick="render_url_request()">GET</button>
 </code>

diff --git a/static/demo.js b/static/demo.js
@@ -27,16 +27,20 @@ function render_demo_request() {
     url += '&cookie=' + cookie;
   }
 
-  if (site != 'instagram') {
+  var request = document.getElementById('request');
+  if (site == 'instagram') {
+    request.innerHTML = 'Instagram is available here in the web UI, <a href="https://granary.readthedocs.io/">and in the library</a>, <em>but not elsewhere (like feed readers)</em>. <a href="https://instagram-atom.appspot.com/">Try instagram-atom instead!</a>'
+    request.style.fontFamily = 'sans-serif';
+    request.style.fontSize = 'large';
+    request.style.color = 'lightcoral';
+  } else {
     for (i in OAUTH_INPUT_IDS) {
       elem = document.getElementById(OAUTH_INPUT_IDS[i]);
       if (elem && elem.value)
         url += '&' + elem.name + '=' + elem.value;
     }
+    request.innerHTML = 'GET <a href="' + url + '">' + url + '</a>';
   }
-
-  document.getElementById('request').innerHTML =
-    'GET <a href="' + url + '">' + url + '</a>';
 }
 
 function render_url_request() {

diff --git a/test_api.py b/test_api.py
@@ -240,15 +240,15 @@ def test_instagram_scrape_with_cookie(self):
       allow_redirects=False, headers={'Cookie': 'sessionid=c00k1e'})
     self.mox.ReplayAll()
     resp = api.application.get_response(
-      '/instagram/@me/@friends/@app/?cookie=c00k1e')
+      '/instagram/@me/@friends/@app/?cookie=c00k1e&interactive=true')
     self.assertEquals(200, resp.status_int, resp.body)
     self.assertEquals('application/json', resp.headers['Content-Type'])
     self.assert_equals(test_instagram.HTML_ACTIVITIES_FULL,
                        json.loads(resp.body)['items'])
 
   def test_instagram_scrape_without_cookie_error(self):
     resp = api.application.get_response(
-      '/instagram/@me/@friends/@app/?format=html&access_token=...')
+      '/instagram/@me/@friends/@app/?format=html&access_token=...&interactive=true')
     self.assert_equals(400, resp.status_int)
     self.assertIn('Scraping only supports activity_id', resp.body)