From 2d4e3558497b09db83652adbf753b1f9397fa629 Mon Sep 17 00:00:00 2001
From: David Lord <eternalcat@gmail.com>
Date: Wed, 25 Oct 2017 14:20:28 +1000
Subject: [PATCH 1/2] Initial implementation of aiohttp export

Makes the export script require Python 3.6.
(async/await were added in 3.5, so maybe there?)

I'll need to tidy it up a bit, but this is v1 of functionality.

Adds -r/--concurrent-requests for throttling. Defaults to 200.
(configured in .env as CONCURRENT_REQUESTS)

Caveats:
- Adds an aiohttp dependency.
- Stops using upload._session, effectively duplicating the
functionality to get access to aiohttp.ClientSession.
- Adds logging to record the files downloaded. Previously silent.

I've also noticed a bug in filename parsing where a bunch of files all
named `apple.png` are created. This script parses the URL to retrieve
the filename, which exposes this duplication. My version does more
parsing in the HTML to detect the :emoji_name: as used by Slack clients.
Currently I'm not addressing this.
---
 .env.example     |  1 +
 export.py        | 78 +++++++++++++++++++++++++++++++++++-------------
 requirements.txt |  1 +
 3 files changed, 60 insertions(+), 20 deletions(-)

diff --git a/.env.example b/.env.example
index 64edf06e..b0eea4ad 100644
--- a/.env.example
+++ b/.env.example
@@ -2,3 +2,4 @@ export SLACK_TEAM=
 export SLACK_COOKIE=
 export EMOJI_NAME_PREFIX=
 export EMOJI_NAME_SUFFIX=
+export CONCURRENT_REQUESTS=
\ No newline at end of file
diff --git a/export.py b/export.py
index 2bd49e64..84ae994c 100755
--- a/export.py
+++ b/export.py
@@ -3,17 +3,22 @@
 # Export emoji in a Slack team as files
 # https://github.com/smashwilson/slack-emojinator
 
-from __future__ import print_function
-
 import requests
 import lxml.html
 
 import argparse
 import os
 import shutil
+import asyncio, aiohttp
+import logging
 
 from upload import _session
 
+logging.basicConfig(level=logging.INFO, format="%(asctime)-15s\t%(message)s")
+logger = logging.getLogger(__name__)
+
+URL = "https://{team_name}.slack.com/customize/emoji"
+
 
 def _argparse():
     parser = argparse.ArgumentParser(
@@ -33,32 +38,65 @@ def _argparse():
         default=os.getenv('SLACK_COOKIE'),
         help='Defaults to the $SLACK_COOKIE environment variable.'
     )
+    parser.add_argument(
+        '--concurrent-requests', '-r',
+        default=os.getenv('CONCURRENT_REQUESTS', 200),
+        help='Maximum concurrent requests. Defaults to the $CONCURRENT_REQUESTS environment variable or 200.'
+    )
     args = parser.parse_args()
     return args
 
+def concurrent_http_get(num_chunks: int, session: aiohttp.ClientSession):
+    semaphore = asyncio.Semaphore(num_chunks)
+
+    async def http_get(url, name):
+        nonlocal semaphore
+        with (await semaphore):
+            response = await session.get(url)
+            body = await response.content.read()
+            await response.wait_for_close()
+        return body, name, url
+    return http_get
 
-def main():
+def handle_response(response, name: str, url: str, directory: str):
+    logger.info(f"Got {name.ljust(15)} {url}")
+    ext = url.split(".")[-1]
+    with open(os.path.join(directory, f"{name}.{ext}"), 'wb') as out:
+        out.write(response)
+
+def _async_session(auth_cookie):
+    return aiohttp.ClientSession(headers={"Cookie": auth_cookie})
+
+async def main():
     args = _argparse()
 
     if not os.path.exists(args.directory):
         os.makedirs(args.directory)
 
-    session = _session(args)
-    resp = session.get(session.url)
-    tree = lxml.html.fromstring(resp.text)
-    urls = tree.xpath(r'//td[@headers="custom_emoji_image"]/span/@data-original')
-    names = [u.split('/')[-2] for u in urls]
-
-    for emoji_name, emoji_url in zip(names, urls):
-        if "alias" not in emoji_url:  # this does not seem necessary ...
-            file_extension = emoji_url.split(".")[-1]
-            request = session.get(emoji_url, stream=True)
-            if request.status_code == 200:
-                filename = '%s/%s.%s' % (args.directory, emoji_name,
-                                         file_extension)
-                with open(filename, 'wb') as out_file:
-                    shutil.copyfileobj(request.raw, out_file)
-                del request
+    async with _async_session(args.cookie) as session:
+        endpoint = URL.format(team_name=args.team_name)
+        logger.info(f"Getting {endpoint}")
+        resp = await session.get(endpoint)
+        async with resp:
+            if resp.status != 200:
+                logger.error(f"Failed to retrieve emoji list ({resp.status})")
+                return
+            text = await resp.text()
+            tree = lxml.html.fromstring(text)
+            urls = tree.xpath(r'//td[@headers="custom_emoji_image"]/span/@data-original')
+            names = [u.split('/')[-2] for u in urls]
+
+            logger.info(f"Parsed {len(names)} emojis")
+            assert len(names) > 0
+
+        http_get = concurrent_http_get(args.concurrent_requests, session)
+        tasks = [http_get(emoji_url, emoji_name) for emoji_name, emoji_url in zip(names, urls) if "alias" not in emoji_url]
+        for future in asyncio.as_completed(tasks):
+            data, name, url = await future
+            handle_response(data, name, url, args.directory)
+
 
 if __name__ == '__main__':
-    main()
+    loop = asyncio.get_event_loop()
+    loop.run_until_complete(main())
+
diff --git a/requirements.txt b/requirements.txt
index 50143c2c..afce5421 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
 beautifulsoup4>=4.4, <5.0
 requests>=2.5.3, <3.0
 lxml==3.7.3
+aiohttp==2.3.1
\ No newline at end of file

From 8bfcb401a3c77e6a494cf362ac746dd17f7e93ea Mon Sep 17 00:00:00 2001
From: David Lord <eternalcat@gmail.com>
Date: Wed, 24 Jan 2018 16:54:03 +1100
Subject: [PATCH 2/2] Add aiohttp to Pipfile

---
 Pipfile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Pipfile b/Pipfile
index ecb68e7f..c240e21d 100644
--- a/Pipfile
+++ b/Pipfile
@@ -14,3 +14,4 @@ name = "pypi"
 "beautifulsoup4" = "<5.0,>=4.4"
 requests = "<3.0,>=2.5.3"
 lxml = "*"
+aiohttp = ">2.3.0"