From 477676c7df1eb9525c1a73c8044c1110caef4350 Mon Sep 17 00:00:00 2001
From: Igor Brigadir <igor.brigadir@gmail.com>
Date: Thu, 6 May 2021 22:02:06 +0100
Subject: [PATCH 1/8] move time related functions to helpers

---
 twarc/client2.py | 28 ----------------------------
 twarc/helpers.py | 31 +++++++++++++++++++++++++++++++
 2 files changed, 31 insertions(+), 28 deletions(-)
 create mode 100644 twarc/helpers.py

diff --git a/twarc/client2.py b/twarc/client2.py
index 2ee2f29f..d2e43e37 100644
--- a/twarc/client2.py
+++ b/twarc/client2.py
@@ -740,34 +740,6 @@ def _ensure_user_id(self, user):
             else:
                 raise ValueError(f"No such user {user}")
 
-def _ts(dt):
-    """
-    Return ISO 8601 / RFC 3339 datetime in UTC. If no timezone is specified it
-    is assumed to be in UTC. The Twitter API does not accept microseconds.
-
-    Args:
-        dt (datetime): a `datetime` object to format.
-
-    Returns:
-        str: an ISO 8601 / RFC 3339 datetime in UTC.
-    """
-    if dt.tzinfo:
-        dt = dt.astimezone(datetime.timezone.utc)
-    else:
-        dt = dt.replace(tzinfo=datetime.timezone.utc)
-    return dt.isoformat(timespec='seconds')
-
-def _utcnow():
-    """
-    Return _now_ in ISO 8601 / RFC 3339 datetime in UTC.
-
-    Returns:
-        datetime: Current timestamp in UTC.
-    """
-    return datetime.datetime.now(datetime.timezone.utc).isoformat(
-        timespec='seconds'
-    )
-
 def _append_metadata(result, url):
     """
     Appends `__twarc` metadata to the result.
diff --git a/twarc/helpers.py b/twarc/helpers.py
new file mode 100644
index 00000000..cfb259d0
--- /dev/null
+++ b/twarc/helpers.py
@@ -0,0 +1,31 @@
+"""
+Useful functions for converting things into different types
+"""
+
+def _ts(dt):
+    """
+    Return ISO 8601 / RFC 3339 datetime in UTC. If no timezone is specified it
+    is assumed to be in UTC. The Twitter API does not accept microseconds.
+
+    Args:
+        dt (datetime): a `datetime` object to format.
+
+    Returns:
+        str: an ISO 8601 / RFC 3339 datetime in UTC.
+    """
+    if dt.tzinfo:
+        dt = dt.astimezone(datetime.timezone.utc)
+    else:
+        dt = dt.replace(tzinfo=datetime.timezone.utc)
+    return dt.isoformat(timespec='seconds')
+
+def _utcnow():
+    """
+    Return _now_ in ISO 8601 / RFC 3339 datetime in UTC.
+
+    Returns:
+        datetime: Current timestamp in UTC.
+    """
+    return datetime.datetime.now(datetime.timezone.utc).isoformat(
+        timespec='seconds'
+    )

From 05c6f27f5e28a629fc08cdf2c5d2a457f8f81e5d Mon Sep 17 00:00:00 2001
From: Igor Brigadir <igor.brigadir@gmail.com>
Date: Thu, 6 May 2021 22:03:08 +0100
Subject: [PATCH 2/8] move time related functions to helpers

---
 twarc/client2.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/twarc/client2.py b/twarc/client2.py
index d2e43e37..ce36501c 100644
--- a/twarc/client2.py
+++ b/twarc/client2.py
@@ -19,6 +19,7 @@
 
 from twarc import expansions
 from twarc.decorators import *
+from twarc.helpers import *
 from twarc.version import version
 
 

From 14c733681c90d02664a5cbd5adae97261db3f863 Mon Sep 17 00:00:00 2001
From: Igor Brigadir <igor.brigadir@gmail.com>
Date: Tue, 11 May 2021 00:06:22 +0100
Subject: [PATCH 3/8] add and move ts functions to helpers

---
 twarc/client2.py | 11 +++++------
 twarc/helpers.py | 39 +++++++++++++++++++++++++++++++++------
 twarc/version.py |  2 +-
 3 files changed, 39 insertions(+), 13 deletions(-)

diff --git a/twarc/client2.py b/twarc/client2.py
index ce36501c..b7457a6b 100644
--- a/twarc/client2.py
+++ b/twarc/client2.py
@@ -9,7 +9,6 @@
 import json
 import logging
 import requests
-import datetime
 import time
 
 from oauthlib.oauth2 import BackendApplicationClient
@@ -121,9 +120,9 @@ def _search(
         if until_id:
             params["until_id"] = until_id
         if start_time:
-            params["start_time"] = _ts(start_time)
+            params["start_time"] = ts(start_time)
         if end_time:
-            params["end_time"] = _ts(end_time)
+            params["end_time"] = ts(end_time)
 
         count = 0
         made_call = time.monotonic()
@@ -487,9 +486,9 @@ def _timeline(
         if until_id:
             params["until_id"] = until_id
         if start_time:
-            params["start_time"] = _ts(start_time)
+            params["start_time"] = ts(start_time)
         if end_time:
-            params["end_time"] = _ts(end_time)
+            params["end_time"] = ts(end_time)
 
         count = 0
         for response in self.get_paginated(url, params=params):
@@ -757,6 +756,6 @@ def _append_metadata(result, url):
     result["__twarc"] = {
                     "url": url,
                     "version": version,
-                    "retrieved_at": _utcnow()
+                    "retrieved_at": utcnow()
                 }
     return result
diff --git a/twarc/helpers.py b/twarc/helpers.py
index cfb259d0..11f32098 100644
--- a/twarc/helpers.py
+++ b/twarc/helpers.py
@@ -1,8 +1,10 @@
 """
 Useful functions for converting things into different types
 """
+import datetime
 
-def _ts(dt):
+
+def ts(dt):
     """
     Return ISO 8601 / RFC 3339 datetime in UTC. If no timezone is specified it
     is assumed to be in UTC. The Twitter API does not accept microseconds.
@@ -17,15 +19,40 @@ def _ts(dt):
         dt = dt.astimezone(datetime.timezone.utc)
     else:
         dt = dt.replace(tzinfo=datetime.timezone.utc)
-    return dt.isoformat(timespec='seconds')
+    return dt.isoformat(timespec="seconds")
+
 
-def _utcnow():
+def utcnow():
     """
     Return _now_ in ISO 8601 / RFC 3339 datetime in UTC.
 
     Returns:
         datetime: Current timestamp in UTC.
     """
-    return datetime.datetime.now(datetime.timezone.utc).isoformat(
-        timespec='seconds'
-    )
+    return datetime.datetime.now(datetime.timezone.utc).isoformat(timespec="seconds")
+
+
+def _snowflake2millis(snowflake_id):
+    return (snowflake_id >> 22) + 1288834974657
+
+
+def _millis2snowflake(milliseconds):
+    return (int(milliseconds) - 1288834974657) << 22
+
+
+def _get_millis(ms):
+    return ms % 1000
+
+
+def _sample_windows(start_ts, end_ts, sample_type):
+    """
+    todo: Generate tuples of start and end snowflake ids between two timestamps
+    
+    sample_type - type of random sample and millisecond range:
+        _1% "Spritzer" Sample   [657-666]
+        10% "Gardenhose" Sample [657-756]
+        10% "Enterprise" Sample [*0*]
+        _1% v2 Sample           [?]
+        _N% v2 Sample           [?]
+    """
+    pass
diff --git a/twarc/version.py b/twarc/version.py
index 9febec5a..e0c5a628 100644
--- a/twarc/version.py
+++ b/twarc/version.py
@@ -1 +1 @@
-version = '2.0.12'
+version = '2.0.13'

From f15692f051c242c856c830b03b94eba0ffb33adf Mon Sep 17 00:00:00 2001
From: Igor Brigadir <igor.brigadir@gmail.com>
Date: Wed, 27 Oct 2021 11:47:07 +0100
Subject: [PATCH 4/8] Revert "merge main"

This reverts commit c5b777aa6f74a80d6e7b0f6923ecfc9d1820f07f, reversing
changes made to 730e98dab72172a4f148358393905b62d5e9d6cd.
---
 docs/README.md                   |   2 +-
 docs/api/client.md               |   4 +
 docs/api/client2.md              |   1 +
 docs/api/expansions.md           |   4 -
 docs/plugins.md                  |   2 +-
 docs/twitter-developer-access.md |   2 -
 mkdocs.yml                       |   1 -
 setup.py                         |   2 +-
 test_twarc2.py                   |  59 ++-----
 twarc/__init__.py                |   1 -
 twarc/__main__.py                |   4 +-
 twarc/client2.py                 |  69 ++------
 twarc/command2.py                | 289 ++++++++-----------------------
 twarc/config.py                  |  16 --
 twarc/expansions.py              |  69 ++------
 twarc/handshake.py               |   7 +-
 twarc/version.py                 |   2 +-
 twarc2.py                        |   4 +
 utils/source.py                  |   7 +-
 utils/wall.py                    |  30 ++--
 20 files changed, 158 insertions(+), 417 deletions(-)
 delete mode 100644 docs/api/expansions.md
 delete mode 100644 twarc/config.py
 create mode 100644 twarc2.py

diff --git a/docs/README.md b/docs/README.md
index 4961bc65..6cec7483 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -1,7 +1,7 @@
 twarc
 =====
 
-***For information about working with the Twitter V2 API please see the [twarc2](https://twarc-project.readthedocs.io/en/latest/twarc2/) page.***
+***For information about working with the Twitter V2 API please see the [twarc2](twarc2) page.***
 
 ---
 
diff --git a/docs/api/client.md b/docs/api/client.md
index dacadbfb..4ccaac08 100644
--- a/docs/api/client.md
+++ b/docs/api/client.md
@@ -2,3 +2,7 @@
 
 ::: twarc.client
   handler: python
+
+
+
+
diff --git a/docs/api/client2.md b/docs/api/client2.md
index f9dcbb2d..21506c7a 100644
--- a/docs/api/client2.md
+++ b/docs/api/client2.md
@@ -2,3 +2,4 @@
 
 ::: twarc.client2
   handler: python
+
diff --git a/docs/api/expansions.md b/docs/api/expansions.md
deleted file mode 100644
index 1e6c763d..00000000
--- a/docs/api/expansions.md
+++ /dev/null
@@ -1,4 +0,0 @@
-# twarc.expansions
-
-::: twarc.expansions
-  handler: python
diff --git a/docs/plugins.md b/docs/plugins.md
index e7905d6f..1d037504 100644
--- a/docs/plugins.md
+++ b/docs/plugins.md
@@ -16,7 +16,7 @@ add it to this list):
 * [twarc-ids](https://pypi.org/project/twarc-ids/): extract tweet ids from tweets
 * [twarc-videos](https://pypi.org/project/twarc-videos): extract videos from tweets
 * [twarc-csv](https://pypi.org/project/twarc-csv/): export tweets to CSV
-* [twarc-timeline-archive](https://pypi.org/project/twarc-timeline-archive): routinely download tweet timelines for a list of users
+* [twarc-timelines](https://pypi.org/project/twarc-timelines): download tweet timelines for a list of users
 
 ## Writing a Plugin
 
diff --git a/docs/twitter-developer-access.md b/docs/twitter-developer-access.md
index 3f0e53fd..64a440dd 100644
--- a/docs/twitter-developer-access.md
+++ b/docs/twitter-developer-access.md
@@ -61,8 +61,6 @@ Now that you have your keys and tokens, you can start using the API. You may be
 
 Be careful not to commit your keys into a public repository or make them visible to the public - do not include them in a client side js script for example. Most apps will ask for API Key and Secret, but "Consumer Key" is "API Key" and "Consumer Secret" is "API Secret".
 
-For Academic Access, there is only one endpoint that takes Bearer (App Only) authentication, so in most cases, the Bearer Token is all you need to share.
-
 ## Step 5: Next Steps
 
 Install `twarc`, and run `twarc2 configure` to set it up.
diff --git a/mkdocs.yml b/mkdocs.yml
index 08662270..b5d0de77 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -28,7 +28,6 @@ nav:
   - Library API:
     - api/client.md
     - api/client2.md
-    - api/expansions.md
 
 plugins:
 - search
diff --git a/setup.py b/setup.py
index 64e0fb32..bc95d3f0 100644
--- a/setup.py
+++ b/setup.py
@@ -17,7 +17,7 @@
         url='https://github.com/docnow/twarc',
         author='Ed Summers',
         author_email='ehs@pobox.com',
-        packages=['twarc'],
+        packages=['twarc', ],
         description='Archive tweets from the command line',
         long_description=long_description,
         long_description_content_type="text/markdown",
diff --git a/test_twarc2.py b/test_twarc2.py
index ca639cc4..9015b2a3 100644
--- a/test_twarc2.py
+++ b/test_twarc2.py
@@ -5,7 +5,6 @@
 import dotenv
 import pytest
 import logging
-import pathlib
 import datetime
 import threading
 
@@ -16,7 +15,6 @@
 access_token = os.environ.get('ACCESS_TOKEN')
 access_token_secret = os.environ.get('ACCESS_TOKEN_SECRET')
 
-test_data = pathlib.Path('test-data')
 logging.basicConfig(filename="test.log", level=logging.INFO)
 
 # Implicitly test the constructor in application auth mode. This ensures that
@@ -294,7 +292,6 @@ def test_follows():
             break
     assert found >= 1000
 
-
 def test_follows_username():
     """
     Test followers and and following by username. 
@@ -333,20 +330,16 @@ def test_flattened():
     found_referenced_tweets = False
 
     event = threading.Event()
-    for count, response in enumerate(T.sample(event=event)):
-
-        # streaming api always returns a tweet at a time but flatten
-        # will put these in a list so they can be treated uniformly
-        tweets = twarc.expansions.flatten(response)
-        assert len(tweets) == 1
-        tweet = tweets[0]
+    for count, result in enumerate(T.sample(event=event)):
+        result = twarc.expansions.flatten(result)
 
+        tweet = result["data"]
         assert "id" in tweet
         logging.info("got sample tweet #%s %s", count, tweet["id"])
 
         author_id = tweet["author_id"]
         assert "author" in tweet
-        assert tweet["author"]["id"] == author_id
+        assert result["data"]["author"]["id"] == author_id
 
         if "in_reply_to_user_id" in tweet:
             assert "in_reply_to_user" in tweet
@@ -369,11 +362,8 @@ def test_flattened():
             assert tweet["entities"]["mentions"][0]["username"]
             found_entities_mentions = True
 
-        # need to ensure there are no errors because a referenced tweet
-        # might be protected or deleted in which case it would not have been
-        # included in the response and would not have been flattened
-        if "errors" not in response and "referenced_tweets" in tweet:
-            assert tweet["referenced_tweets"][0]["text"]
+        if "referenced_tweets" in tweet:
+            assert tweet["referenced_tweets"][0]["id"]
             found_referenced_tweets = True
 
         if found_geo and found_in_reply_to_user and found_attachments_media \
@@ -393,33 +383,18 @@ def test_flattened():
     assert found_referenced_tweets, "found referenced tweets"
 
 
-def test_ensure_flattened():
-    resp = next(T.search_recent('twitter'))
-
-    # flatten a response
-    flat1 = twarc.expansions.ensure_flattened(resp)
-    assert isinstance(flat1, list)
-    assert len(flat1) > 1
-    assert 'author' in flat1[0]
-
-    # flatten the flattened list
-    flat2 = twarc.expansions.ensure_flattened(flat1)
-    assert isinstance(flat2, list)
-    assert len(flat2) == len(flat1)
-    assert 'author' in flat2[0]
+def test_flatten_noop():
+    """
+    Flattening twice should be a no-op.
+    """
+    resp = next(T.tweet_lookup(range(1000, 2000)))
 
-    # flatten a tweet object which will force it into a list
-    flat3 = twarc.expansions.ensure_flattened(flat2[0])
-    assert isinstance(flat3, list)
-    assert len(flat3) == 1
+    flat1 = twarc.expansions.flatten(resp)
+    assert len(flat1) > 0
 
-    with pytest.raises(ValueError):
-        twarc.expansions.ensure_flattened({'fake': 'tweet'})
-    with pytest.raises(ValueError):
-        twarc.expansions.ensure_flattened([{'fake': 'tweet'}])
-    with pytest.raises(ValueError):
-        flat1[0].pop('author')
-        twarc.expansions.ensure_flattened(flat1)
+    flat2 = twarc.expansions.flatten(flat1)
+    assert len(flat2) > 0
+    assert len(flat1) == len(flat2)
 
 
 def test_twarc_metadata():
@@ -433,7 +408,7 @@ def test_twarc_metadata():
 
     for response in T.tweet_lookup(range(1000, 2000)):
         assert "__twarc" in response
-        assert "__twarc" in twarc.expansions.flatten(response)[0]
+        assert "__twarc" in twarc.expansions.flatten(response)
 
     # Witout metadata
     T.metadata = False
diff --git a/twarc/__init__.py b/twarc/__init__.py
index e3773bc6..38e546e8 100644
--- a/twarc/__init__.py
+++ b/twarc/__init__.py
@@ -1,4 +1,3 @@
 from .client import Twarc
 from .client2 import Twarc2
 from .version import version
-from .expansions import ensure_flattened
diff --git a/twarc/__main__.py b/twarc/__main__.py
index 85497b8c..53f093ef 100644
--- a/twarc/__main__.py
+++ b/twarc/__main__.py
@@ -1,5 +1,5 @@
-from twarc.command2 import twarc2
+import twarc.command
 
 if __name__ == "__main__":
-    twarc2(prog_name="python -m twarc2")
+    twarc.command.main()
 
diff --git a/twarc/client2.py b/twarc/client2.py
index 4b017ecb..b7457a6b 100644
--- a/twarc/client2.py
+++ b/twarc/client2.py
@@ -7,10 +7,9 @@
 import re
 import ssl
 import json
-import time
 import logging
-import datetime
 import requests
+import time
 
 from oauthlib.oauth2 import BackendApplicationClient
 from requests.exceptions import ConnectionError
@@ -25,8 +24,6 @@
 
 log = logging.getLogger("twarc")
 
-TWITTER_EPOCH = datetime.datetime(2006, 3, 21, tzinfo=datetime.timezone.utc)
-
 
 class Twarc2:
     """
@@ -136,19 +133,17 @@ def _search(
                 count += len(response['data'])
                 yield response
 
-            else:
-                log.info(f'Retrieved an empty page of results.')
-
-            # Calculate the amount of time to sleep, accounting for any
-            # processing time used by the rest of the application.
-            # This is to satisfy the 1 request / 1 second rate limit
-            # on the search/all endpoint.
-            time.sleep(
-                max(0, sleep_between - (time.monotonic() - made_call))
-            )
-            made_call = time.monotonic()
+                # Calculate the amount of time to sleep, accounting for any
+                # processing time used by the rest of the application.
+                # This is to satisfy the 1 request / 1 second rate limit
+                # on the search/all endpoint.
 
-        log.info(f'No more results for search {query}.')
+                time.sleep(
+                    max(0, sleep_between - (time.monotonic() - made_call))
+                )
+                made_call = time.monotonic()
+            else:
+                log.info(f'no more results for search')
 
     def search_recent(
             self, query, since_id=None, until_id=None, start_time=None,
@@ -211,13 +206,6 @@ def search_all(
             generator[dict]: a generator, dict for each paginated response.
         """
         url = "https://api.twitter.com/2/tweets/search/all"
-
-        # start time defaults to the beginning of Twitter to override the 
-        # default of the last month. Only do this if start_time is not already
-        # specified and since_id isn't being used
-        if start_time is None and since_id is None:
-            start_time = TWITTER_EPOCH
-
         return self._search(
             url, query, since_id, until_id, start_time, end_time, max_results,
             sleep_between=1.05
@@ -365,21 +353,6 @@ def sample(self, event=None, record_keepalive=False):
                             data = _append_metadata(data, resp.url)
                         yield data
 
-                        # Check for an operational disconnect error in the response
-                        if data.get("errors", []):
-                            for error in data["errors"]:
-                                if error.get("disconnect_type") == "OperationalDisconnect":
-                                    log.info(
-                                        "Received operational disconnect message: "
-                                        "This stream has fallen too far behind in "
-                                        "processing tweets. Some data may have been "
-                                        "lost."
-                                    )
-                                    # Sleep briefly, then break this get call and
-                                    # attempt to reconnect.
-                                    time.sleep(5)
-                                    break
-
             except requests.exceptions.HTTPError as e:
                 errors += 1
                 log.error("caught http error %s on %s try", e, errors)
@@ -524,9 +497,7 @@ def _timeline(
                 count += len(response['data'])
                 yield response
             else:
-                log.info(f'Retrieved an empty page of results for timeline {user_id}')
-
-        log.info(f'No more results for timeline {user_id}.')
+                log.info(f'no more results for timeline')
 
     def timeline(
         self, user, since_id=None, until_id=None, start_time=None,
@@ -735,15 +706,13 @@ def connect(self):
             self.client.close()
 
         if self.auth_type == "application" and self.bearer_token:
-            log.info('creating HTTP session headers for app auth.')
-            auth = f"Bearer {self.bearer_token}"
-            log.debug('authorization: %s', auth)
+            log.info('Creating HTTP session headers for app auth.')
             self.client = requests.Session()
-            self.client.headers.update({"Authorization": auth})
+            self.client.headers.update(
+                {"Authorization": f"Bearer {self.bearer_token}"}
+            )
         elif self.auth_type == "application":
-            log.info('creating app auth client via OAuth2')
-            log.debug('client_id: %s', self.consumer_key)
-            log.debug('client_secret: %s', self.consumer_secret)
+            log.info('Creating app auth client via OAuth2')
             client = BackendApplicationClient(client_id=self.consumer_key)
             self.client = OAuth2Session(client=client)
             self.client.fetch_token(
@@ -753,10 +722,6 @@ def connect(self):
             )
         else:
             log.info('creating user auth client')
-            log.debug('client_id: %s', self.consumer_key)
-            log.debug('client_secret: %s', self.consumer_secret)
-            log.debug('resource_owner_key: %s', self.access_token)
-            log.debug('resource_owner_secret: %s', self.access_token_secret)
             self.client = OAuth1Session(
                 client_key=self.consumer_key,
                 client_secret=self.consumer_secret,
diff --git a/twarc/command2.py b/twarc/command2.py
index fdb58c50..b1de6f92 100644
--- a/twarc/command2.py
+++ b/twarc/command2.py
@@ -19,12 +19,10 @@
 
 from twarc.version import version
 from twarc.handshake import handshake
-from twarc.config import ConfigProvider 
 from twarc.decorators import cli_api_error
-from twarc.expansions import ensure_flattened
+from twarc.expansions import flatten as flat
 from click_config_file import configuration_option
 
-config_provider = ConfigProvider()
 
 @with_plugins(iter_entry_points('twarc.plugins'))
 @click.group()
@@ -44,26 +42,23 @@
     show_default=True,
 )
 @click.option('--log', default='twarc.log')
-@click.option('--verbose', is_flag=True, default=False)
 @click.option('--metadata/--no-metadata', default=True, show_default=True,
     help="Include/don't include metadata about when and how data was collected.")
-@configuration_option(cmd_name='twarc', config_file_name='config', provider=config_provider)
+@configuration_option(cmd_name='twarc')
 @click.pass_context
 def twarc2(
     ctx, consumer_key, consumer_secret, access_token, access_token_secret, bearer_token,
-    log, metadata, app_auth, verbose
+    log, metadata, app_auth
 ):
     """
     Collect data from the Twitter V2 API.
     """
     logging.basicConfig(
         filename=log,
-        level=logging.DEBUG if verbose else logging.INFO,
+        level=logging.INFO,
         format="%(asctime)s %(levelname)s %(message)s"
     )
 
-    logging.info("using config %s", config_provider.file_path)
-
     if bearer_token or (consumer_key and consumer_secret):
         if app_auth and (bearer_token or (consumer_key and consumer_secret)):
             ctx.obj = twarc.Twarc2(
@@ -108,19 +103,15 @@ def configure(ctx):
     """
     Set up your Twitter app keys.
     """
-
-    config_file = config_provider.file_path
-    logging.info('creating config file: %s', config_file)
-
-    config_dir = pathlib.Path(config_file).parent
-    if not config_dir.is_dir():
-        logging.info('creating config directory: %s', config_dir)
-        config_dir.mkdir(parents=True)
- 
     keys = handshake()
     if keys is None:
         raise click.ClickException("Unable to authenticate")
 
+    config_dir = pathlib.Path(click.get_app_dir('twarc'))
+    if not config_dir.is_dir():
+        config_dir.mkdir(parents=True)
+    config_file = config_dir / 'config'
+
     config = configobj.ConfigObj(unrepr=True)
     config.filename = config_file
 
@@ -167,12 +158,14 @@ def get_version():
     help='Search the full archive (requires Academic Research track)')
 @click.option('--limit', default=0, help='Maximum number of tweets to save')
 @click.option('--max-results', default=0, help='Maximum number of tweets per API response')
+@click.option('--flatten', is_flag=True, default=False,
+    help='Include expansions inline with tweets, and one line per tweet')
 @click.argument('query', type=str)
 @click.argument('outfile', type=click.File('w'), default='-')
 @click.pass_obj
 @cli_api_error
 def search(T, query, outfile, since_id, until_id, start_time, end_time, limit,
-        max_results, archive):
+        max_results, archive, flatten):
     """
     Search for tweets.
     """
@@ -184,6 +177,12 @@ def search(T, query, outfile, since_id, until_id, start_time, end_time, limit,
         # default number of tweets per response 500 when not set otherwise
         if max_results == 0:
             max_results = 500
+
+        # if the user is searching the historical archive the assumption is that
+        # they want to search everything, and not just the previous month which
+        # is the default: https://github.com/DocNow/twarc/issues/434
+        if start_time == None and since_id == None:
+            start_time = datetime.datetime(2006, 3, 21, tzinfo=datetime.timezone.utc)
     else:
         if max_results == 0:
             max_results = 100
@@ -191,19 +190,21 @@ def search(T, query, outfile, since_id, until_id, start_time, end_time, limit,
 
     for result in search_method(query, since_id, until_id, start_time, end_time,
             max_results):
-        _write(result, outfile)
+        _write(result, outfile, flatten)
         count += len(result['data'])
         if limit != 0 and count >= limit:
             break
 
 @twarc2.command('tweet')
+@click.option('--flatten', is_flag=True, default=False,
+    help='Include expansions inline with tweets, and one line per tweet')
 @click.option('--pretty', is_flag=True, default=False,
     help='Pretty print the JSON')
 @click.argument('tweet_id', type=str)
 @click.argument('outfile', type=click.File('w'), default='-')
 @click.pass_obj
 @cli_api_error
-def tweet(T, tweet_id, outfile, pretty):
+def tweet(T, tweet_id, outfile, flatten, pretty):
     """
     Look up a tweet using its tweet id or URL.
     """
@@ -212,23 +213,25 @@ def tweet(T, tweet_id, outfile, pretty):
     if not re.match('^\d+$', tweet_id):
         click.echo(click.style("Please enter a tweet URL or ID", fg="red"), err=True)
     result = next(T.tweet_lookup([tweet_id]))
-    _write(result, outfile, pretty=pretty)
+    _write(result, outfile, flatten, pretty=pretty)
 
 
 @twarc2.command('followers')
 @click.option('--limit', default=0, help='Maximum number of followers to save')
+@click.option('--flatten', is_flag=True, default=False,
+    help='Include expansions inline with users, and one line per user')
 @click.argument('user', type=str)
 @click.argument('outfile', type=click.File('w'), default='-')
 @click.pass_obj
 @cli_api_error
-def followers(T, user, outfile, limit):
+def followers(T, user, outfile, limit, flatten):
     """
     Get the followers for a given user.
     """
     count = 0
 
     for result in T.followers(user):
-        _write(result, outfile)
+        _write(result, outfile, flatten)
         count += len(result['data'])
         if limit != 0 and count >= limit:
             break
@@ -236,18 +239,20 @@ def followers(T, user, outfile, limit):
 
 @twarc2.command('following')
 @click.option('--limit', default=0, help='Maximum number of friends to save')
+@click.option('--flatten', is_flag=True, default=False,
+    help='Include expansions inline with users, and one line per user')
 @click.argument('userd', type=str)
 @click.argument('outfile', type=click.File('w'), default='-')
 @click.pass_obj
 @cli_api_error
-def following(T, user, outfile, limit):
+def following(T, user, outfile, limit, flatten):
     """
     Get the users who are following a given user.
     """
     count = 0
 
     for result in T.following(user):
-        _write(result, outfile)
+        _write(result, outfile, flatten)
         count += len(result['data'])
         if limit != 0 and count >= limit:
             break
@@ -255,10 +260,12 @@ def following(T, user, outfile, limit):
 
 @twarc2.command('sample')
 @click.option('--limit', default=0, help='Maximum number of tweets to save')
+@click.option('--flatten', is_flag=True, default=False,
+    help='Include expansions inline with tweets, and one line per tweet.')
 @click.argument('outfile', type=click.File('a+'), default='-')
 @click.pass_obj
 @cli_api_error
-def sample(T, outfile, limit):
+def sample(T, flatten, outfile, limit):
     """
     Fetch tweets from the sample stream.
     """
@@ -269,35 +276,38 @@ def sample(T, outfile, limit):
         count += 1
         if limit != 0 and count >= limit:
             event.set()
-        _write(result, outfile)
+        _write(result, outfile, flatten)
 
 
 @twarc2.command('hydrate')
 @click.argument('infile', type=click.File('r'), default='-')
 @click.argument('outfile', type=click.File('w'), default='-')
+@click.option('--flatten', is_flag=True, default=False,
+    help='Include expansions inline with tweets, and one line per tweet.')
 @click.pass_obj
 @cli_api_error
-def hydrate(T, infile, outfile):
+def hydrate(T, infile, outfile, flatten):
     """
     Hydrate tweet ids.
     """
     for result in T.tweet_lookup(infile):
-        _write(result, outfile)
+        _write(result, outfile, flatten)
 
 
 @twarc2.command('users')
 @click.option('--usernames', is_flag=True, default=False)
+@click.option('--flatten', is_flag=True, default=False,
+    help='Include expansions inline with tweets, and one line per tweet.')
 @click.argument('infile', type=click.File('r'), default='-')
 @click.argument('outfile', type=click.File('w'), default='-')
 @click.pass_obj
 @cli_api_error
-def users(T, infile, outfile, usernames):
+def users(T, infile, outfile, usernames, flatten):
     """
     Get data for user ids or usernames.
     """
     for result in T.user_lookup(infile, usernames):
-        _write(result, outfile)
-
+        _write(result, outfile, flatten)
 
 @twarc2.command('mentions')
 @click.option('--since-id', type=int,
@@ -310,20 +320,20 @@ def users(T, infile, outfile, usernames):
 @click.option('--end-time',
     type=click.DateTime(formats=('%Y-%m-%d', '%Y-%m-%dT%H:%M:%S')),
     help='Match tweets sent before time (ISO 8601/RFC 3339)')
+@click.option('--flatten', is_flag=True, default=False,
+    help='Include expansions inline with tweets, and one line per tweet')
 @click.argument('user_id', type=str)
 @click.argument('outfile', type=click.File('w'), default='-')
 @click.pass_obj
 @cli_api_error
-def mentions(T, user_id, outfile, since_id, until_id, start_time, end_time):
+def mentions(T, user_id, outfile, since_id, until_id, start_time, end_time, flatten):
     """
     Retrieve the most recent tweets mentioning the given user.
     """
     for result in T.mentions(user_id, since_id, until_id, start_time, end_time):
-        _write(result, outfile)
-
+        _write(result, outfile, flatten)
 
 @twarc2.command('timeline')
-@click.option('--limit', default=0, help='Maximum number of tweets to return')
 @click.option('--since-id', type=int,
     help='Match tweets sent after tweet id')
 @click.option('--until-id', type=int,
@@ -334,183 +344,18 @@ def mentions(T, user_id, outfile, since_id, until_id, start_time, end_time):
 @click.option('--end-time',
     type=click.DateTime(formats=('%Y-%m-%d', '%Y-%m-%dT%H:%M:%S')),
     help='Match tweets sent before time (ISO 8601/RFC 3339)')
-@click.option('--use-search', is_flag=True, default=False,
-    help='Use the search/all API endpoint which is not limited to the last 3200 tweets, but requires Academic Product Track access.')
+@click.option('--flatten', is_flag=True, default=False,
+    help='Include expansions inline with tweets, and one line per tweet')
 @click.argument('user_id', type=str)
 @click.argument('outfile', type=click.File('w'), default='-')
 @click.pass_obj
 @cli_api_error
-def timeline(T, user_id, outfile, since_id, until_id, start_time, end_time,
-        use_search, limit):
-    """
-    Retrieve recent tweets for the given user.
-    """
-
-    if use_search:
-        q = f'from:{user_id}'
-        tweets = T.search_all(q, since_id, until_id, start_time, end_time)
-    else:
-        tweets = T.timeline(user_id, since_id, until_id, start_time, end_time)
-
-    count = 0
-    for result in tweets:
-        _write(result, outfile)
-
-        count += len(result['data'])
-        if limit != 0 and count >= limit:
-            break
-
-
-@twarc2.command('timelines')
-@click.option('--limit', default=0, help='Maximum number of tweets to return')
-@click.option('--timeline-limit', default=0,
-    help='Maximum number of tweets to return per-timeline')
-@click.option('--use-search', is_flag=True, default=False,
-    help='Use the search/all API endpoint which is not limited to the last 3200 tweets, but requires Academic Product Track access.')
-@click.argument('infile', type=click.File('r'), default='-')
-@click.argument('outfile', type=click.File('w'), default='-')
-@click.pass_obj
-def timelines(T, infile, outfile, limit, timeline_limit, use_search):
+def timeline(T, user_id, outfile, since_id, until_id, start_time, end_time, flatten):
     """
-    Fetch the timelines of every user in an input source of tweets. If
-    the input is a line oriented text file of user ids or usernames that will 
-    be used instead.
+    Retrieve the 3200 most recent tweets for the given user.
     """
-    total_count = 0
-    seen = set()
-    for line in infile:
-        line = line.strip()
-        if line == "":
-            continue
-
-        users = []
-        try:
-            data = ensure_flattened(json.loads(line))
-            users = set([t['author']['id'] for t in ensure_flattened(data)])
-        except json.JSONDecodeError:
-            users = set([line])
-        except ValueError:
-            users = set([line])
-
-        for user in users:
-
-            # only process a given user once
-            if user in seen:
-                continue
-            seen.add(user)
-
-            # which api endpoint to use
-            if use_search and since_id:
-                tweets = T.search_all(f'from:{user}', since_id=since_id)
-            elif use_search:
-                tweets = T.search_all(f'from:{user}')
-            else:
-                tweets = T.timeline(user)
-
-            timeline_count = 0
-            for response in tweets:
-                _write(response, outfile)
-
-                timeline_count += len(response['data'])
-                if timeline_limit != 0 and timeline_count >= timeline_limit:
-                    break
-
-                total_count += len(response['data'])
-                if limit != 0 and total_count >= limit:
-                    return
-
-
-@twarc2.command('conversation')
-@click.option('--archive', is_flag=True, default=False,
-    help='Search the full archive (requires Academic Research track)')
-@click.argument('tweet_id', type=str)
-@click.argument('outfile', type=click.File('w'), default='-')
-@click.pass_obj
-@cli_api_error
-def conversation(T, tweet_id, archive, outfile):
-    """
-    Retrieve a conversation thread using the tweet id.
-    """
-    q = f'conversation_id:{tweet_id}'
-    if archive:
-        search = T.search_all(q)
-    else:
-        search = T.search_recent(q)
-    for resp in search:
-        _write(resp, outfile)
-
-
-@twarc2.command('conversations')
-@click.option('--limit', default=0, help='Maximum number of tweets to return')
-@click.option('--conversation-limit', default=0,
-    help='Maximum number of tweets to return per-conversation')
-@click.option('--archive', is_flag=True, default=False,
-    help='Use the Academic Research project track access to the full archive')
-@click.argument('infile', type=click.File('r'), default='-')
-@click.argument('outfile', type=click.File('w'), default='-')
-@click.pass_obj
-@cli_api_error
-def conversations(T, infile, outfile, archive, limit, conversation_limit):
-    """
-    Fetch the full conversation threads that the input tweets are a part of.
-    Alternatively the input can be a line oriented file of conversation ids.
-    """
-
-    # keep track of converstation ids that have been fetched so that they
-    # aren't fetched twice
-    seen = set()
-
-    # use the archive or recent search?
-    search = T.search_all if archive else T.search_recent
-
-    count = 0
-    stop = False
-    for line in infile:
-        conv_ids = []
-
-        # stop will get set when the total tweet limit has been met
-        if stop:
-            break
-
-        # get a specific conversation id
-        line = line.strip()
-        if re.match(r'^\d+$', line):
-            if line in seen:
-                continue
-            conv_ids = [line]
-
-        # generate all conversation_ids that are referenced in tweets input
-        else:
-            def f():
-                for tweet in ensure_flattened(json.loads(line)):
-                    yield tweet.get('conversation_id')
-            conv_ids = f()
-
-        # output results while paying attention to the set limits
-        conv_count = 0
-
-        for conv_id in conv_ids:
-
-            if conv_id in seen:
-                logging.info(f'already fetched conversation_id {conv_id}')
-            seen.add(conv_id)
-
-            conv_count = 0
-
-            logging.info(f'fetching conversation {conv_id}')
-            for result in search(f'conversation_id:{conv_id}'): 
-                _write(result, outfile, False)
-
-                count += len(result['data'])
-                if limit != 0 and count >= limit:
-                    logging.info(f'reached tweet limit of {limit}')
-                    stop = True
-                    break
-
-                conv_count += len(result['data'])
-                if conversation_limit !=0 and conv_count >= conversation_limit:
-                    logging.info(f'reached conversation limit {conversation_limit}')
-                    break
+    for result in T.timeline(user_id, since_id, until_id, start_time, end_time):
+        _write(result, outfile, flatten)
 
 
 @twarc2.command('flatten')
@@ -519,24 +364,25 @@ def f():
 @cli_api_error
 def flatten(infile, outfile):
     """
-    "Flatten" tweets, or move expansions inline with tweet objects and ensure
-    that each line of output is a single tweet.
+    "Flatten" tweets, or move expansions inline with tweet objects.
     """
     if (infile.name == outfile.name):
         click.echo(click.style(f"💔 Cannot flatten files in-place, specify a different output file!", fg='red'), err=True)
         return
 
     for line in infile:
-        for tweet in ensure_flattened(json.loads(line)):
-            _write(tweet, outfile, False)
+        result = json.loads(line)
+        _write(result, outfile, True)
 
 
 @twarc2.command('stream')
 @click.option('--limit', default=0, help='Maximum number of tweets to return')
+@click.option('--flatten', is_flag=True, default=False,
+    help='Include expansions inline with tweets, and one line per tweet')
 @click.argument('outfile', type=click.File('a+'), default='-')
 @click.pass_obj
 @cli_api_error
-def stream(T, outfile, limit):
+def stream(T, flatten, outfile, limit):
     """
     Fetch tweets from the live stream.
     """
@@ -552,7 +398,7 @@ def stream(T, outfile, limit):
         if limit != 0 and count == limit:
             logging.info(f'reached limit {limit}')
             event.set()
-        _write(result, outfile)
+        _write(result, outfile, flatten)
 
 
 @twarc2.group()
@@ -690,6 +536,17 @@ def _error_str(errors):
 
     return click.style("\n".join(parts), fg="red")
 
-def _write(results, outfile, pretty=False):
+def _write(results, outfile, flatten, pretty=False):
     indent = 2 if pretty else None
-    click.echo(json.dumps(results, indent=indent), file=outfile)
+    if 'data' in results:
+        if flatten:
+            if isinstance(results['data'], list):
+                for r in flat(results)['data']:
+                    click.echo(json.dumps(r, indent=indent), file=outfile)
+            else:
+                r = flat(results)['data']
+                click.echo(json.dumps(r, indent=indent), file=outfile)
+        else:
+            click.echo(json.dumps(results, indent=indent), file=outfile)
+    else:
+        click.echo(json.dumps(results, indent=indent), file=outfile)
diff --git a/twarc/config.py b/twarc/config.py
deleted file mode 100644
index 3fe2096d..00000000
--- a/twarc/config.py
+++ /dev/null
@@ -1,16 +0,0 @@
-import logging
-import configobj
-
-# Adapted from click_config_file.configobj_provider so that we can store the
-# file path that the config was loaded from in order to log it later.
-
-log = logging
-
-class ConfigProvider():
-
-    def __init__(self):
-        self.file_path = None
-
-    def __call__(self, file_path, cmd_name):
-        self.file_path = file_path
-        return configobj.ConfigObj(file_path, unrepr=True)
diff --git a/twarc/expansions.py b/twarc/expansions.py
index 22a57c24..64ec6fcc 100644
--- a/twarc/expansions.py
+++ b/twarc/expansions.py
@@ -1,10 +1,8 @@
 """
 This module contains a list of the known Twitter V2+ API expansions and fields
-for each expansion, and a function flatten() for "flattening" a result set, 
-including all expansions inline. 
+for each expansion, and a function for "flattening" a result set, including all
+expansions inline
 
-ensure_flattened() can be used in tweet processing programs that need to make 
-sure that data is flattened.
 """
 
 from collections import defaultdict
@@ -117,13 +115,9 @@ def extract_includes(response, expansion, _id="id"):
 
 def flatten(response):
     """
-    Flatten an API response by moving all "included" entities inline with the
-    tweets they are referenced from. flatten expects an entire page response
-    from the API (data, includes, meta) and will raise a ValueError if what is
-    passed in does not appear to be an API response. It will return a list of
-    dictionaries where each dictionary represents a tweet. Empty objects will
-    be returned for things that are missing in includes, which can happen when
-    protected or delete users or tweets are referenced.
+    Flatten the response. Expects an entire page response from the API (data,
+    includes, meta) Defaults: Return empty objects for things missing in
+    includes. Doesn't modify tweets, only adds extra data.
     """
 
     # Users extracted both by id and by username for expanding mentions
@@ -197,60 +191,17 @@ def expand_payload(payload):
 
         return payload
 
-    # First expand the included tweets, before processing actual result tweets:
+    # First, expand the included tweets, before processing actual result tweets:
     for included_id, included_tweet in extract_includes(response, "tweets").items():
         includes_tweets[included_id] = expand_payload(included_tweet)
 
     # Now flatten the list of tweets or an individual tweet
-    tweets = []
     if "data" in response:
-        data = response['data']
-
-        if isinstance(data, list):
-            tweets = expand_payload(response["data"])
-        elif isinstance(data, dict):
-            tweets = [expand_payload(response["data"])]
+        response["data"] = expand_payload(response["data"])
 
         # Add the __twarc metadata to each tweet if it's a result set
-        if "__twarc" in response:
-            for tweet in tweets:
+        if "__twarc" in response and isinstance(response["data"], list):
+            for tweet in response["data"]:
                 tweet["__twarc"] = response["__twarc"]
-    else:
-        raise ValueError(f'missing data stanza in response: {response}')
-
-    return tweets
-
-
-def ensure_flattened(data):
-    """
-    Will ensure that the supplied data is "flattened". The input data can be a
-    response from the Twitter API, a list of tweet dictionaries, or a single tweet
-    dictionary. It will always return a list of tweet dictionaries. A ValueError 
-    will be thrown if the supplied data is not recognizable or it cannot be 
-    flattened.
-
-    ensure_flattened is designed for use in twarc plugins and other tweet
-    processing applications that want to operate on a stream of tweets, and 
-    examine included entities like users and tweets without hunting and
-    pecking in the response data.
-    """
-    if isinstance(data, dict) and 'data' in data:
-        return flatten(data)
-
-    elif isinstance(data, list) and len(data) > 0 and isinstance(data[0], dict):
-        # if author is present it is already flattened
-        if 'author' in data[0]:
-            return data
-        else:
-            raise ValueError('unable to flatten list of tweets without original response data: {data}')
-
-    elif isinstance(data, dict) and 'author' in data:
-        # if author is present it is already flattened
-        if 'author' in data:
-            return [data]
-        else:
-            raise ValueError(f'unable to flatten tweet dictionary without original response data: {data}')
-
-    else:
-        raise ValueError(f'cannot flatten unrecognized data: {data}')
 
+    return response
diff --git a/twarc/handshake.py b/twarc/handshake.py
index cafacd0d..dce86c0a 100644
--- a/twarc/handshake.py
+++ b/twarc/handshake.py
@@ -6,6 +6,7 @@
 
 from requests_oauthlib import OAuth1
 from urllib.parse import parse_qs
+from getpass import getpass
 
 def handshake():
 
@@ -15,7 +16,7 @@ def handshake():
     access_token = ""
     access_token_secret = ""
 
-    bearer_token = input(
+    bearer_token = getpass(
         "Please enter your Bearer Token (leave blank to skip to API key configuration): "
     )
 
@@ -31,7 +32,7 @@ def handshake():
             "Configure API keys and secrets."
 
     consumer_key = input("Please enter your API key: ")
-    consumer_secret = input("Please enter your API secret: ")
+    consumer_secret = getpass("Please enter your API secret: ")
 
     # verify that the keys work to get the bearer token
     url = "https://api.twitter.com/oauth2/token"
@@ -95,7 +96,7 @@ def handshake():
         screen_name = credentials.get('screen_name')[0]
     else:
         access_token = input("Enter your Access Token: ")
-        access_token_secret = input("Enter your Access Token Secret: ")
+        access_token_secret = getpass("Enter your Access Token Secret: ")
         screen_name = "default"
 
     return {
diff --git a/twarc/version.py b/twarc/version.py
index 4f58e967..e0c5a628 100644
--- a/twarc/version.py
+++ b/twarc/version.py
@@ -1 +1 @@
-version = '2.1.8'
+version = '2.0.13'
diff --git a/twarc2.py b/twarc2.py
new file mode 100644
index 00000000..aa5f2278
--- /dev/null
+++ b/twarc2.py
@@ -0,0 +1,4 @@
+from twarc.command2 import twarc2
+
+if __name__ == "__main__":
+    twarc2(prog_name="python -m twarc2")
diff --git a/utils/source.py b/utils/source.py
index 18d87c05..4f9d4284 100755
--- a/utils/source.py
+++ b/utils/source.py
@@ -5,6 +5,7 @@
 Example usage:
 utils/source.py tweets.jsonl > sources.html
 """
+from __future__ import print_function
 import json
 import fileinput
 from collections import defaultdict
@@ -54,14 +55,14 @@
 
   <header>
   <h1>Twitter client sources</h1>
-  <em>created on the command line with <a href="https://github.com/DocNow/twarc">twarc</a></em>
+  <em>created on the command line with <a href="http://github.com/edsu/twarc">twarc</a></em>
   </header>
 
   <table>
 """)
 
 for source in sumsort:
-    print('<tr><td>{}</td><td>{}</td></tr>'.format(source, summary[source]))
+    print('<tr><td>{}</td><td>{}</td></tr>'.format(source.encode('utf-8'), summary[source]))
 print("""
 
 
@@ -70,7 +71,7 @@
 <footer id="page">
 <hr>
 <br>
-created on the command line with <a href="https://github.com/DocNow/twarc">twarc</a>.
+created on the command line with <a href="http://github.com/edsu/twarc">twarc</a>.
 <br>
 <br>
 </footer>
diff --git a/utils/wall.py b/utils/wall.py
index 836628df..9b865602 100755
--- a/utils/wall.py
+++ b/utils/wall.py
@@ -1,4 +1,5 @@
 #!/usr/bin/env python
+# -*- coding: utf-8 -*-
 
 """
 Feed wall.py your JSON and get a wall of tweets as HTML. If you want to get the
@@ -7,6 +8,7 @@
     % tail -r tweets.jsonl | ./wall.py > wall.html
 
 """
+from __future__ import print_function
 
 import os
 import re
@@ -109,7 +111,7 @@ def text(t):
 
   <header>
   <h1>Title Here</h1>
-  <em>created on the command line with <a href="https://github.com/DocNow/twarc">twarc</a></em>
+  <em>created on the command line with <a href="http://github.com/edsu/twarc">twarc</a></em>
   </header>
 
   <div id="tweets">
@@ -147,10 +149,10 @@ def text(t):
         "created_at": tweet["created_at"],
         "name": tweet["user"]["name"],
         "username": tweet["user"]["screen_name"],
-        "user_url": "https://twitter.com/" + tweet["user"]["screen_name"],
+        "user_url": "http://twitter.com/" + tweet["user"]["screen_name"],
         "text": text(tweet),
         "avatar": AVATAR_DIR + "/" + filename,
-        "url": "https://twitter.com/" + tweet["user"]["screen_name"] + "/status/" + tweet["id_str"],
+        "url": "http://twitter.com/" + tweet["user"]["screen_name"] + "/status/" + tweet["id_str"],
     }
 
     if 'retweet_status' in tweet:
@@ -158,19 +160,20 @@ def text(t):
     else:
         t['retweet_count'] = tweet.get('retweet_count', 0)
 
-    t['favorite_count'] = tweet.get('favorite_count', 0)
-    t['retweet_string'] = 'retweet' if t['retweet_count'] == 1 else 'retweets'
-    t['favorite_string'] = 'like' if t['favorite_count'] == 1 else 'likes'
+    if t['retweet_count'] == 1:
+        t['retweet_string'] = 'retweet'
+    else:
+        t['retweet_string'] = 'retweets'
 
     for url in tweet['entities']['urls']:
         a = '<a href="%(expanded_url)s">%(url)s</a>' % url
         start, end = url['indices']
         t['text'] = t['text'][0:start] + a + t['text'][end:]
 
-    t['text'] = re.sub('@([A-Za-z0-9_]+)', r'<a href="https://twitter.com/\g<1>">@\g<1></a>', t['text'])
-    t['text'] = re.sub(' #([^ ]+)', r' <a href="https://twitter.com/search?q=%23\g<1>&src=hash">#\g<1></a>', t['text'])
+    t['text'] = re.sub(' @([^ ]+)', ' <a href="http://twitter.com/\g<1>">@\g<1></a>', t['text'])
+    t['text'] = re.sub(' #([^ ]+)', ' <a href="https://twitter.com/search?q=%23\g<1>&src=hash">#\g<1></a>', t['text'])
 
-    html = """
+    html = u"""
     <article class="tweet">
       <img class="avatar" src="%(avatar)s">
       <a href="%(user_url)s" class="name">%(name)s</a><br>
@@ -178,13 +181,16 @@ def text(t):
       <br>
       <div class="text">%(text)s</div><br>
       <footer>
-      %(retweet_count)s %(retweet_string)s, %(favorite_count)s %(favorite_string)s<br>
+      %(retweet_count)s %(retweet_string)s<br>
       <a href="%(url)s"><time>%(created_at)s</time></a>
       </footer>
     </article>
     """ % t
 
-    print(html)
+    if sys.version_info.major == 2:
+        print(html.encode('utf8'))
+    else:
+        print(html)
 
 print("""
 
@@ -193,7 +199,7 @@ def text(t):
 <footer id="page">
 <hr>
 <br>
-created on the command line with <a href="https://github.com/DocNow/twarc">twarc</a>.
+created on the command line with <a href="http://github.com/edsu/twarc">twarc</a>.
 <br>
 <br>
 </footer>

From 1303abcd338e258be338f01534e4e01d2db98e74 Mon Sep 17 00:00:00 2001
From: Igor Brigadir <igor.brigadir@gmail.com>
Date: Wed, 27 Oct 2021 11:47:27 +0100
Subject: [PATCH 5/8] Revert "Merge branch 'main' into random-sample"

This reverts commit 730e98dab72172a4f148358393905b62d5e9d6cd, reversing
changes made to 14c733681c90d02664a5cbd5adae97261db3f863.
---
 README.md         |  2 +-
 twarc/__main__.py |  5 -----
 twarc/command2.py | 14 ++++++--------
 twarc2.py         |  4 ----
 4 files changed, 7 insertions(+), 18 deletions(-)
 delete mode 100644 twarc/__main__.py
 delete mode 100644 twarc2.py

diff --git a/README.md b/README.md
index c0551a68..51fc74b8 100644
--- a/README.md
+++ b/README.md
@@ -25,7 +25,7 @@ If you prefer you can create a page on the [wiki](https://github.com/docnow/twar
 
 If you are interested in adding functionality to twarc or fixing something that's broken here are the steps to setting up your development environment:
 
-    git clone https://github.com/docnow/twarc
+    git clone https://github.io/docnow/twarc
     cd twarc
     pip install -r requirements.txt
 
diff --git a/twarc/__main__.py b/twarc/__main__.py
deleted file mode 100644
index 53f093ef..00000000
--- a/twarc/__main__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-import twarc.command
-
-if __name__ == "__main__":
-    twarc.command.main()
-
diff --git a/twarc/command2.py b/twarc/command2.py
index b1de6f92..c0df34bf 100644
--- a/twarc/command2.py
+++ b/twarc/command2.py
@@ -150,10 +150,10 @@ def get_version():
     help='Match tweets sent prior to tweet id')
 @click.option('--start-time',
     type=click.DateTime(formats=('%Y-%m-%d', '%Y-%m-%dT%H:%M:%S')),
-    help='Match tweets created after UTC time (ISO 8601/RFC 3339), e.g.  2021-01-01T12:31:04')
+    help='Match tweets created after time (ISO 8601/RFC 3339), e.g.  2021-01-01T12:31:04')
 @click.option('--end-time',
     type=click.DateTime(formats=('%Y-%m-%d', '%Y-%m-%dT%H:%M:%S')),
-    help='Match tweets sent before UTC time (ISO 8601/RFC 3339)')
+    help='Match tweets sent before time (ISO 8601/RFC 3339)')
 @click.option('--archive', is_flag=True, default=False,
     help='Search the full archive (requires Academic Research track)')
 @click.option('--limit', default=0, help='Maximum number of tweets to save')
@@ -388,11 +388,9 @@ def stream(T, flatten, outfile, limit):
     """
     event = threading.Event()
     count = 0
-    click.echo(click.style(f'Started a stream with rules:', fg='green'),
-            err=True)
+    click.echo(click.style(f'Started a stream with rules:', fg='green'))
     _print_stream_rules(T)
-    click.echo(click.style(f'Writing to {outfile.name}\nCTRL+C to stop...',
-        fg='green'), err=True)
+    click.echo(click.style(f'Writing to {outfile.name}\nCTRL+C to stop...', fg='green'))
     for result in T.stream(event=event):
         count += 1
         if limit != 0 and count == limit:
@@ -425,7 +423,7 @@ def _print_stream_rules(T):
     """
     result = T.get_stream_rules()
     if 'data' not in result or len(result['data']) == 0:
-        click.echo('No rules yet. Add them with ' + click.style('twarc2 stream-rules add', bold=True), err=True)
+        click.echo('No rules yet. Add them with ' + click.style('twarc2 stream-rules add', bold=True))
     else:
         count = 0
         for rule in result['data']:
@@ -434,7 +432,7 @@ def _print_stream_rules(T):
             s = rule['value']
             if 'tag' in rule:
                 s += f" (tag: {rule['tag']})"
-            click.echo(click.style(f'☑  {s}'), err=True)
+            click.echo(click.style(f'☑  {s}'))
             count += 1
 
 
diff --git a/twarc2.py b/twarc2.py
deleted file mode 100644
index aa5f2278..00000000
--- a/twarc2.py
+++ /dev/null
@@ -1,4 +0,0 @@
-from twarc.command2 import twarc2
-
-if __name__ == "__main__":
-    twarc2(prog_name="python -m twarc2")

From 1a8358a0282b4ed464dc8be08d2279e2e5c9e32a Mon Sep 17 00:00:00 2001
From: Igor Brigadir <igor.brigadir@gmail.com>
Date: Wed, 27 Oct 2021 11:47:51 +0100
Subject: [PATCH 6/8] Revert "add and move ts functions to helpers"

This reverts commit 14c733681c90d02664a5cbd5adae97261db3f863.
---
 twarc/client2.py | 11 ++++++-----
 twarc/helpers.py | 39 ++++++---------------------------------
 twarc/version.py |  2 +-
 3 files changed, 13 insertions(+), 39 deletions(-)

diff --git a/twarc/client2.py b/twarc/client2.py
index b7457a6b..ce36501c 100644
--- a/twarc/client2.py
+++ b/twarc/client2.py
@@ -9,6 +9,7 @@
 import json
 import logging
 import requests
+import datetime
 import time
 
 from oauthlib.oauth2 import BackendApplicationClient
@@ -120,9 +121,9 @@ def _search(
         if until_id:
             params["until_id"] = until_id
         if start_time:
-            params["start_time"] = ts(start_time)
+            params["start_time"] = _ts(start_time)
         if end_time:
-            params["end_time"] = ts(end_time)
+            params["end_time"] = _ts(end_time)
 
         count = 0
         made_call = time.monotonic()
@@ -486,9 +487,9 @@ def _timeline(
         if until_id:
             params["until_id"] = until_id
         if start_time:
-            params["start_time"] = ts(start_time)
+            params["start_time"] = _ts(start_time)
         if end_time:
-            params["end_time"] = ts(end_time)
+            params["end_time"] = _ts(end_time)
 
         count = 0
         for response in self.get_paginated(url, params=params):
@@ -756,6 +757,6 @@ def _append_metadata(result, url):
     result["__twarc"] = {
                     "url": url,
                     "version": version,
-                    "retrieved_at": utcnow()
+                    "retrieved_at": _utcnow()
                 }
     return result
diff --git a/twarc/helpers.py b/twarc/helpers.py
index 11f32098..cfb259d0 100644
--- a/twarc/helpers.py
+++ b/twarc/helpers.py
@@ -1,10 +1,8 @@
 """
 Useful functions for converting things into different types
 """
-import datetime
 
-
-def ts(dt):
+def _ts(dt):
     """
     Return ISO 8601 / RFC 3339 datetime in UTC. If no timezone is specified it
     is assumed to be in UTC. The Twitter API does not accept microseconds.
@@ -19,40 +17,15 @@ def ts(dt):
         dt = dt.astimezone(datetime.timezone.utc)
     else:
         dt = dt.replace(tzinfo=datetime.timezone.utc)
-    return dt.isoformat(timespec="seconds")
-
+    return dt.isoformat(timespec='seconds')
 
-def utcnow():
+def _utcnow():
     """
     Return _now_ in ISO 8601 / RFC 3339 datetime in UTC.
 
     Returns:
         datetime: Current timestamp in UTC.
     """
-    return datetime.datetime.now(datetime.timezone.utc).isoformat(timespec="seconds")
-
-
-def _snowflake2millis(snowflake_id):
-    return (snowflake_id >> 22) + 1288834974657
-
-
-def _millis2snowflake(milliseconds):
-    return (int(milliseconds) - 1288834974657) << 22
-
-
-def _get_millis(ms):
-    return ms % 1000
-
-
-def _sample_windows(start_ts, end_ts, sample_type):
-    """
-    todo: Generate tuples of start and end snowflake ids between two timestamps
-    
-    sample_type - type of random sample and millisecond range:
-        _1% "Spritzer" Sample   [657-666]
-        10% "Gardenhose" Sample [657-756]
-        10% "Enterprise" Sample [*0*]
-        _1% v2 Sample           [?]
-        _N% v2 Sample           [?]
-    """
-    pass
+    return datetime.datetime.now(datetime.timezone.utc).isoformat(
+        timespec='seconds'
+    )
diff --git a/twarc/version.py b/twarc/version.py
index e0c5a628..9febec5a 100644
--- a/twarc/version.py
+++ b/twarc/version.py
@@ -1 +1 @@
-version = '2.0.13'
+version = '2.0.12'

From c458799fa15a2795c091bbc26a086f8426a4c105 Mon Sep 17 00:00:00 2001
From: Igor Brigadir <igor.brigadir@gmail.com>
Date: Wed, 27 Oct 2021 11:48:08 +0100
Subject: [PATCH 7/8] Revert "move time related functions to helpers"

This reverts commit 05c6f27f5e28a629fc08cdf2c5d2a457f8f81e5d.
---
 twarc/client2.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/twarc/client2.py b/twarc/client2.py
index ce36501c..d2e43e37 100644
--- a/twarc/client2.py
+++ b/twarc/client2.py
@@ -19,7 +19,6 @@
 
 from twarc import expansions
 from twarc.decorators import *
-from twarc.helpers import *
 from twarc.version import version
 
 

From 42b9306deb0f8c327921b801fdb4997ae04fbf66 Mon Sep 17 00:00:00 2001
From: Igor Brigadir <igor.brigadir@gmail.com>
Date: Wed, 27 Oct 2021 11:48:22 +0100
Subject: [PATCH 8/8] Revert "move time related functions to helpers"

This reverts commit 477676c7df1eb9525c1a73c8044c1110caef4350.
---
 twarc/client2.py | 28 ++++++++++++++++++++++++++++
 twarc/helpers.py | 31 -------------------------------
 2 files changed, 28 insertions(+), 31 deletions(-)
 delete mode 100644 twarc/helpers.py

diff --git a/twarc/client2.py b/twarc/client2.py
index d2e43e37..2ee2f29f 100644
--- a/twarc/client2.py
+++ b/twarc/client2.py
@@ -740,6 +740,34 @@ def _ensure_user_id(self, user):
             else:
                 raise ValueError(f"No such user {user}")
 
+def _ts(dt):
+    """
+    Return ISO 8601 / RFC 3339 datetime in UTC. If no timezone is specified it
+    is assumed to be in UTC. The Twitter API does not accept microseconds.
+
+    Args:
+        dt (datetime): a `datetime` object to format.
+
+    Returns:
+        str: an ISO 8601 / RFC 3339 datetime in UTC.
+    """
+    if dt.tzinfo:
+        dt = dt.astimezone(datetime.timezone.utc)
+    else:
+        dt = dt.replace(tzinfo=datetime.timezone.utc)
+    return dt.isoformat(timespec='seconds')
+
+def _utcnow():
+    """
+    Return _now_ in ISO 8601 / RFC 3339 datetime in UTC.
+
+    Returns:
+        datetime: Current timestamp in UTC.
+    """
+    return datetime.datetime.now(datetime.timezone.utc).isoformat(
+        timespec='seconds'
+    )
+
 def _append_metadata(result, url):
     """
     Appends `__twarc` metadata to the result.
diff --git a/twarc/helpers.py b/twarc/helpers.py
deleted file mode 100644
index cfb259d0..00000000
--- a/twarc/helpers.py
+++ /dev/null
@@ -1,31 +0,0 @@
-"""
-Useful functions for converting things into different types
-"""
-
-def _ts(dt):
-    """
-    Return ISO 8601 / RFC 3339 datetime in UTC. If no timezone is specified it
-    is assumed to be in UTC. The Twitter API does not accept microseconds.
-
-    Args:
-        dt (datetime): a `datetime` object to format.
-
-    Returns:
-        str: an ISO 8601 / RFC 3339 datetime in UTC.
-    """
-    if dt.tzinfo:
-        dt = dt.astimezone(datetime.timezone.utc)
-    else:
-        dt = dt.replace(tzinfo=datetime.timezone.utc)
-    return dt.isoformat(timespec='seconds')
-
-def _utcnow():
-    """
-    Return _now_ in ISO 8601 / RFC 3339 datetime in UTC.
-
-    Returns:
-        datetime: Current timestamp in UTC.
-    """
-    return datetime.datetime.now(datetime.timezone.utc).isoformat(
-        timespec='seconds'
-    )