DocNow · igorbrigadir · May 6, 2021 · May 6, 2021 · May 10, 2021 · May 15, 2021
diff --git a/README.md b/README.md
@@ -25,7 +25,7 @@ If you prefer you can create a page on the [wiki](https://github.com/docnow/twar
 
 If you are interested in adding functionality to twarc or fixing something that's broken here are the steps to setting up your development environment:
 
-    git clone https://github.com/docnow/twarc
+    git clone https://github.io/docnow/twarc
     cd twarc
     pip install -r requirements.txt
 

diff --git a/docs/README.md b/docs/README.md
@@ -1,7 +1,7 @@
 twarc
 =====
 
-***For information about working with the Twitter V2 API please see the [twarc2](https://twarc-project.readthedocs.io/en/latest/twarc2/) page.***
+***For information about working with the Twitter V2 API please see the [twarc2](twarc2) page.***
 
 ---
 

diff --git a/docs/api/client.md b/docs/api/client.md
@@ -2,3 +2,7 @@
 
 ::: twarc.client
   handler: python
+
+
+
+
diff --git a/docs/api/client2.md b/docs/api/client2.md
@@ -2,3 +2,4 @@
 
 ::: twarc.client2
   handler: python
+
diff --git a/docs/api/expansions.md b/docs/api/expansions.md
diff --git a/docs/plugins.md b/docs/plugins.md
@@ -16,7 +16,7 @@ add it to this list):
 * [twarc-ids](https://pypi.org/project/twarc-ids/): extract tweet ids from tweets
 * [twarc-videos](https://pypi.org/project/twarc-videos): extract videos from tweets
 * [twarc-csv](https://pypi.org/project/twarc-csv/): export tweets to CSV
-* [twarc-timeline-archive](https://pypi.org/project/twarc-timeline-archive): routinely download tweet timelines for a list of users
+* [twarc-timelines](https://pypi.org/project/twarc-timelines): download tweet timelines for a list of users
 
 ## Writing a Plugin
 

diff --git a/docs/twitter-developer-access.md b/docs/twitter-developer-access.md
@@ -61,8 +61,6 @@ Now that you have your keys and tokens, you can start using the API. You may be
 
 Be careful not to commit your keys into a public repository or make them visible to the public - do not include them in a client side js script for example. Most apps will ask for API Key and Secret, but "Consumer Key" is "API Key" and "Consumer Secret" is "API Secret".
 
-For Academic Access, there is only one endpoint that takes Bearer (App Only) authentication, so in most cases, the Bearer Token is all you need to share.
-
 ## Step 5: Next Steps
 
 Install `twarc`, and run `twarc2 configure` to set it up.

diff --git a/mkdocs.yml b/mkdocs.yml
@@ -28,7 +28,6 @@ nav:
   - Library API:
     - api/client.md
     - api/client2.md
-    - api/expansions.md
 
 plugins:
 - search

diff --git a/setup.py b/setup.py
@@ -17,7 +17,7 @@
         url='https://github.com/docnow/twarc',
         author='Ed Summers',
         author_email='[email protected]',
-        packages=['twarc'],
+        packages=['twarc', ],
         description='Archive tweets from the command line',
         long_description=long_description,
         long_description_content_type="text/markdown",

diff --git a/test_twarc2.py b/test_twarc2.py
@@ -5,7 +5,6 @@
 import dotenv
 import pytest
 import logging
-import pathlib
 import datetime
 import threading
 
@@ -16,7 +15,6 @@
 access_token = os.environ.get('ACCESS_TOKEN')
 access_token_secret = os.environ.get('ACCESS_TOKEN_SECRET')
 
-test_data = pathlib.Path('test-data')
 logging.basicConfig(filename="test.log", level=logging.INFO)
 
 # Implicitly test the constructor in application auth mode. This ensures that
@@ -294,7 +292,6 @@ def test_follows():
             break
     assert found >= 1000
 
-
 def test_follows_username():
     """
     Test followers and and following by username. 
@@ -333,20 +330,16 @@ def test_flattened():
     found_referenced_tweets = False
 
     event = threading.Event()
-    for count, response in enumerate(T.sample(event=event)):
-
-        # streaming api always returns a tweet at a time but flatten
-        # will put these in a list so they can be treated uniformly
-        tweets = twarc.expansions.flatten(response)
-        assert len(tweets) == 1
-        tweet = tweets[0]
+    for count, result in enumerate(T.sample(event=event)):
+        result = twarc.expansions.flatten(result)
 
+        tweet = result["data"]
         assert "id" in tweet
         logging.info("got sample tweet #%s %s", count, tweet["id"])
 
         author_id = tweet["author_id"]
         assert "author" in tweet
-        assert tweet["author"]["id"] == author_id
+        assert result["data"]["author"]["id"] == author_id
 
         if "in_reply_to_user_id" in tweet:
             assert "in_reply_to_user" in tweet
@@ -369,11 +362,8 @@ def test_flattened():
             assert tweet["entities"]["mentions"][0]["username"]
             found_entities_mentions = True
 
-        # need to ensure there are no errors because a referenced tweet
-        # might be protected or deleted in which case it would not have been
-        # included in the response and would not have been flattened
-        if "errors" not in response and "referenced_tweets" in tweet:
-            assert tweet["referenced_tweets"][0]["text"]
+        if "referenced_tweets" in tweet:
+            assert tweet["referenced_tweets"][0]["id"]
             found_referenced_tweets = True
 
         if found_geo and found_in_reply_to_user and found_attachments_media \
@@ -393,33 +383,18 @@ def test_flattened():
     assert found_referenced_tweets, "found referenced tweets"
 
 
-def test_ensure_flattened():
-    resp = next(T.search_recent('twitter'))
-
-    # flatten a response
-    flat1 = twarc.expansions.ensure_flattened(resp)
-    assert isinstance(flat1, list)
-    assert len(flat1) > 1
-    assert 'author' in flat1[0]
-
-    # flatten the flattened list
-    flat2 = twarc.expansions.ensure_flattened(flat1)
-    assert isinstance(flat2, list)
-    assert len(flat2) == len(flat1)
-    assert 'author' in flat2[0]
+def test_flatten_noop():
+    """
+    Flattening twice should be a no-op.
+    """
+    resp = next(T.tweet_lookup(range(1000, 2000)))
 
-    # flatten a tweet object which will force it into a list
-    flat3 = twarc.expansions.ensure_flattened(flat2[0])
-    assert isinstance(flat3, list)
-    assert len(flat3) == 1
+    flat1 = twarc.expansions.flatten(resp)
+    assert len(flat1) > 0
 
-    with pytest.raises(ValueError):
-        twarc.expansions.ensure_flattened({'fake': 'tweet'})
-    with pytest.raises(ValueError):
-        twarc.expansions.ensure_flattened([{'fake': 'tweet'}])
-    with pytest.raises(ValueError):
-        flat1[0].pop('author')
-        twarc.expansions.ensure_flattened(flat1)
+    flat2 = twarc.expansions.flatten(flat1)
+    assert len(flat2) > 0
+    assert len(flat1) == len(flat2)
 
 
 def test_twarc_metadata():
@@ -433,7 +408,7 @@ def test_twarc_metadata():
 
     for response in T.tweet_lookup(range(1000, 2000)):
         assert "__twarc" in response
-        assert "__twarc" in twarc.expansions.flatten(response)[0]
+        assert "__twarc" in twarc.expansions.flatten(response)
 
     # Witout metadata
     T.metadata = False

diff --git a/twarc/__init__.py b/twarc/__init__.py
@@ -1,4 +1,3 @@
 from .client import Twarc
 from .client2 import Twarc2
 from .version import version
-from .expansions import ensure_flattened
diff --git a/twarc/__main__.py b/twarc/__main__.py
diff --git a/twarc/client2.py b/twarc/client2.py
@@ -7,11 +7,10 @@
 import re
 import ssl
 import json
-import time
 import logging
-import datetime
 import requests
 import datetime
+import time
 
 from oauthlib.oauth2 import BackendApplicationClient
 from requests.exceptions import ConnectionError
@@ -25,8 +24,6 @@
 
 log = logging.getLogger("twarc")
 
-TWITTER_EPOCH = datetime.datetime(2006, 3, 21, tzinfo=datetime.timezone.utc)
-
 
 class Twarc2:
     """
@@ -136,19 +133,17 @@ def _search(
                 count += len(response['data'])
                 yield response
 
-            else:
-                log.info(f'Retrieved an empty page of results.')
-
-            # Calculate the amount of time to sleep, accounting for any
-            # processing time used by the rest of the application.
-            # This is to satisfy the 1 request / 1 second rate limit
-            # on the search/all endpoint.
-            time.sleep(
-                max(0, sleep_between - (time.monotonic() - made_call))
-            )
-            made_call = time.monotonic()
+                # Calculate the amount of time to sleep, accounting for any
+                # processing time used by the rest of the application.
+                # This is to satisfy the 1 request / 1 second rate limit
+                # on the search/all endpoint.
 
-        log.info(f'No more results for search {query}.')
+                time.sleep(
+                    max(0, sleep_between - (time.monotonic() - made_call))
+                )
+                made_call = time.monotonic()
+            else:
+                log.info(f'no more results for search')
 
     def search_recent(
             self, query, since_id=None, until_id=None, start_time=None,
@@ -211,13 +206,6 @@ def search_all(
             generator[dict]: a generator, dict for each paginated response.
         """
         url = "https://api.twitter.com/2/tweets/search/all"
-
-        # start time defaults to the beginning of Twitter to override the 
-        # default of the last month. Only do this if start_time is not already
-        # specified and since_id isn't being used
-        if start_time is None and since_id is None:
-            start_time = TWITTER_EPOCH
-
         return self._search(
             url, query, since_id, until_id, start_time, end_time, max_results,
             sleep_between=1.05
@@ -365,21 +353,6 @@ def sample(self, event=None, record_keepalive=False):
                             data = _append_metadata(data, resp.url)
                         yield data
 
-                        # Check for an operational disconnect error in the response
-                        if data.get("errors", []):
-                            for error in data["errors"]:
-                                if error.get("disconnect_type") == "OperationalDisconnect":
-                                    log.info(
-                                        "Received operational disconnect message: "
-                                        "This stream has fallen too far behind in "
-                                        "processing tweets. Some data may have been "
-                                        "lost."
-                                    )
-                                    # Sleep briefly, then break this get call and
-                                    # attempt to reconnect.
-                                    time.sleep(5)
-                                    break
-
             except requests.exceptions.HTTPError as e:
                 errors += 1
                 log.error("caught http error %s on %s try", e, errors)
@@ -524,9 +497,7 @@ def _timeline(
                 count += len(response['data'])
                 yield response
             else:
-                log.info(f'Retrieved an empty page of results for timeline {user_id}')
-
-        log.info(f'No more results for timeline {user_id}.')
+                log.info(f'no more results for timeline')
 
     def timeline(
         self, user, since_id=None, until_id=None, start_time=None,
@@ -735,15 +706,13 @@ def connect(self):
             self.client.close()
 
         if self.auth_type == "application" and self.bearer_token:
-            log.info('creating HTTP session headers for app auth.')
-            auth = f"Bearer {self.bearer_token}"
-            log.debug('authorization: %s', auth)
+            log.info('Creating HTTP session headers for app auth.')
             self.client = requests.Session()
-            self.client.headers.update({"Authorization": auth})
+            self.client.headers.update(
+                {"Authorization": f"Bearer {self.bearer_token}"}
+            )
         elif self.auth_type == "application":
-            log.info('creating app auth client via OAuth2')
-            log.debug('client_id: %s', self.consumer_key)
-            log.debug('client_secret: %s', self.consumer_secret)
+            log.info('Creating app auth client via OAuth2')
             client = BackendApplicationClient(client_id=self.consumer_key)
             self.client = OAuth2Session(client=client)
             self.client.fetch_token(
@@ -753,10 +722,6 @@ def connect(self):
             )
         else:
             log.info('creating user auth client')
-            log.debug('client_id: %s', self.consumer_key)
-            log.debug('client_secret: %s', self.consumer_secret)
-            log.debug('resource_owner_key: %s', self.access_token)
-            log.debug('resource_owner_secret: %s', self.access_token_secret)
             self.client = OAuth1Session(
                 client_key=self.consumer_key,
                 client_secret=self.consumer_secret,