Merge pull request #1324 from wkentaro/faster-uploading-of-jsk-data

[jsk_data] Diretly use drive command locally for faster uploading
jsk-ros-pkg · Dec 28, 2015 · 149b2fa · 149b2fa
2 parents 1bf583c + 7304206
commit 149b2fa
Show file tree

Hide file tree

Showing 3 changed files with 171 additions and 54 deletions.
diff --git a/jsk_data/scripts/drive-linux-x64 b/jsk_data/scripts/drive-linux-x64
diff --git a/jsk_data/src/jsk_data/cli.py b/jsk_data/src/jsk_data/cli.py
@@ -9,6 +9,11 @@
 import click
 from jsk_tools.cltool import percol_select
 
+from jsk_data.gdrive import delete_gdrive
+from jsk_data.gdrive import download_gdrive
+from jsk_data.gdrive import info_gdrive
+from jsk_data.gdrive import list_gdrive
+from jsk_data.gdrive import upload_gdrive
 from jsk_data.ssh import connect_ssh
 from jsk_data.ssh import get_user_by_hostname
 from jsk_data.util import filename_with_timestamp
@@ -46,30 +51,33 @@ def cli():
 def cmd_get(public, query):
     """Download specified file."""
     if not query:
-        candidates = _list_aries_files(public=public)
+        if public:
+            lines = list_gdrive().splitlines()
+            candidates = [l.split()[1] for l in lines]
+        else:
+            candidates = _list_aries_files(public=public)
         selected = percol_select(candidates)
         if len(selected) != 1:
             sys.stderr.write('Please select 1 filename.\n')
             sys.exit(1)
         query = selected[0]
         sys.stderr.write('Selected: {0}\n'.format(query))
 
-    public_level = 'public' if public else 'private'
-    cmd = 'rsync -avz --progress -e "ssh -o StrictHostKeyChecking=no"\
-           --bwlimit=100000 {usr}@{host}:{dir}/{lv}/{q} .'
-    cmd = cmd.format(usr=LOGIN_USER, host=HOST,
-                     dir=DATA_DIR, lv=public_level, q=query)
-    subprocess.call(shlex.split(cmd))
+    if public:
+        download_gdrive(filename=query)
+    else:
+        cmd = 'rsync -avz --progress -e "ssh -o StrictHostKeyChecking=no"\
+            --bwlimit=100000 {usr}@{host}:{dir}/private/{q} .'
+        cmd = cmd.format(usr=LOGIN_USER, host=HOST, dir=DATA_DIR, q=query)
+        subprocess.call(shlex.split(cmd))
 
 
-def _list_aries_files(public, query=None, ls_options=None):
-    public_level = 'public' if public else 'private'
+def _list_aries_files(query=None, ls_options=None):
     query = query or ''
     ls_options = ls_options or []
     with connect_ssh(HOST, LOGIN_USER) as ssh:
-        cmd = 'ls {opt} {dir}/{lv}/{q}'
-        cmd = cmd.format(opt=' '.join(ls_options), dir=DATA_DIR,
-                         lv=public_level, q=query)
+        cmd = 'ls {opt} {dir}/private/{q}'
+        cmd = cmd.format(opt=' '.join(ls_options), dir=DATA_DIR, q=query)
         _, stdout, _ = ssh.exec_command(cmd)
         files = stdout.read().splitlines()
     return files
@@ -96,7 +104,13 @@ def cmd_ls(public, query, show_size, sort, reverse):
     if reverse:
         ls_options.append('--reverse')
 
-    print('\n'.join(_list_aries_files(public, query, ls_options)))
+    if public:
+        if ls_options:
+            sys.stderr.write(
+                'WARNING: if public=True, ignores all ls options\n')
+        sys.stdout.write(list_gdrive())
+    else:
+        print('\n'.join(_list_aries_files(query, ls_options)))
 
 
 @cli.command(name='put', help='Upload file to aries.')
@@ -107,8 +121,6 @@ def cmd_ls(public, query, show_size, sort, reverse):
 @click.argument('filename', required=True, type=click.Path(exists=True))
 def cmd_put(public, filename):
     """Upload file to aries."""
-    public_level = 'public' if public else 'private'
-
     filename_org = filename
     filename = filename_with_timestamp(filename)
     if filename_org != filename:
@@ -120,31 +132,26 @@ def cmd_put(public, filename):
             sys.exit(1)
         os.rename(filename_org, filename)
 
-    print('Uploading to aries...')
-    cmd = 'rsync -avz --progress -e "ssh -o StrictHostKeyChecking=no"\
-           --bwlimit=100000 {file} {usr}@{host}:{dir}/{lv}/'
-    cmd = cmd.format(file=filename, usr=LOGIN_USER, host=HOST,
-                     dir=DATA_DIR, lv=public_level)
-    subprocess.call(shlex.split(cmd))
-    print('Done.')
-    if public_level == 'private':
-        sys.exit(0)
-
-    print('Uploading to Google Drive...')
-    with connect_ssh(HOST, LOGIN_USER) as ssh:
-        cmd = '{dir}/scripts/upload-public-data.sh {dir}/public/{file}'
-        cmd = cmd.format(dir=DATA_DIR, file=filename)
-        _, stdout, stderr = ssh.exec_command(cmd)
-        for line in stdout.readlines():
+    if public:
+        print('Uploading to Google Drive...')
+        stdout = upload_gdrive(filename)
+        for line in stdout.splitlines():
             if line.startswith('Title:'):
                 filename = line.split(' ')[-1].strip()
             elif line.startswith('Id:'):
                 file_id = line.split(' ')[-1].strip()
-        sys.stderr.write(stderr.read())
-    print('Done.')
-    print('You can download it by:')
-    dl_url = google_drive_file_url(file_id, download=True)
-    print('$ wget {url} -O {file}'.format(url=dl_url, file=filename))
+        print('Done.')
+        print('You can download it by:')
+        dl_url = google_drive_file_url(file_id, download=True)
+        print('$ wget {url} -O {file}'.format(url=dl_url, file=filename))
+    else:
+        print('Uploading to aries...')
+        cmd = 'rsync -avz --progress -e "ssh -o StrictHostKeyChecking=no"\
+            --bwlimit=100000 {file} {usr}@{host}:{dir}/private/'
+        cmd = cmd.format(file=filename, usr=LOGIN_USER, host=HOST,
+                         dir=DATA_DIR)
+        subprocess.call(shlex.split(cmd))
+        print('Done.')
 
 
 @cli.command(name='pubinfo', help='Show public data info.')
@@ -153,28 +160,24 @@ def cmd_put(public, filename):
               help='Print out download command')
 def cmd_pubinfo(filename, show_dl_cmd):
     if not filename:
-        candidates = _list_aries_files(public=True)
+        # FIXME: gdrive does not return full title if it is longer than 40
+        candidates = list_gdrive().splitlines()
         selected = percol_select(candidates)
         if len(selected) != 1:
             sys.stderr.write('Please select 1 filename.\n')
             sys.exit(1)
-        filename = selected[0]
-
-    with connect_ssh(HOST, LOGIN_USER) as ssh:
-        cmd = '{dir}/scripts/list-public-data.sh'.format(dir=DATA_DIR)
-        _, stdout, stderr = ssh.exec_command(cmd)
-        stdout.next()  # drop header
-        for line in stdout.readlines():
-            file_id, title = line.split()[:2]
-            # FIXME: gdrive does not return full title if it is longer than 40
-            if len(filename) > 40:
-                filename = filename[:19] + '...' + filename[-18:]
-            if filename == title:
-                break
-        else:
-            sys.stderr.write('file not found: {0}\n'.format(filename))
-            sys.stderr.write('Run `jsk_data ls --public` to find files.\n')
-            return
+        filename = selected[0].split()[1]
+
+    stdout = list_gdrive()
+    for line in stdout.splitlines():
+        file_id, title = line.split()[:2]
+        if filename == title:
+            filename = info_gdrive(id=file_id, only_filename=True)
+            break
+    else:
+        sys.stderr.write('file not found: {0}\n'.format(filename))
+        sys.stderr.write('Run `jsk_data ls --public` to find files.\n')
+        return
 
     dl_url = google_drive_file_url(file_id, download=True)
     if show_dl_cmd:
@@ -189,3 +192,24 @@ def cmd_pubinfo(filename, show_dl_cmd):
 Download URL: {dl_url}'''.format(id=file_id, file=filename,
                                  view_url=view_url, dl_url=dl_url)
         print(info)
+
+
+@cli.command(name='delete', help='Delete specified file.')
+@click.option('-p', '--public', is_flag=True, help='Handle public files.')
+@click.argument('filename', default='')
+def cmd_delete(public, filename):
+    """Delete specified file."""
+    if not public:
+        sys.stderr.write('ERROR: public=False is not supported\n')
+        sys.exit(1)
+
+    if not filename:
+        # FIXME: gdrive does not return full title if it is longer than 40
+        candidates = list_gdrive().splitlines()
+        selected = percol_select(candidates)
+        if len(selected) != 1:
+            sys.stderr.write('Please select 1 filename.\n')
+            sys.exit(1)
+        filename = selected[0].split()[1]
+
+    delete_gdrive(filename=filename)
diff --git a/jsk_data/src/jsk_data/gdrive.py b/jsk_data/src/jsk_data/gdrive.py
@@ -0,0 +1,93 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""Wrapper module for gdrive command"""
+
+import os
+import subprocess
+import sys
+
+
+# directory id in google drive of jsk
+DIR_ID = '0B9P1L--7Wd2vUGplQkVLTFBWcFE'
+
+
+def run_gdrive(args=None, stdout=True):
+    if args is None:
+        args = ''
+    ros_home = os.getenv('ROS_HOME', os.path.expanduser('~/.ros'))
+    pkg_ros_home = os.path.join(ros_home, 'jsk_data')
+    config = os.path.join(pkg_ros_home, '.gdrive')
+    cmd = 'rosrun jsk_data drive-linux-x64 --config {config} {args}'\
+          .format(args=args, config=config)
+    if stdout:
+        return subprocess.check_output(cmd, shell=True)
+    else:
+        subprocess.call(cmd, shell=True)
+
+
+def _init_gdrive():
+    """This should be called before any commands with gdrive"""
+    ros_home = os.getenv('ROS_HOME', os.path.expanduser('~/.ros'))
+    pkg_ros_home = os.path.join(ros_home, 'jsk_data')
+    config = os.path.join(pkg_ros_home, '.gdrive')
+    if os.path.exists(config):
+        return
+    if not os.path.exists(pkg_ros_home):
+        os.makedirs(pkg_ros_home)
+    run_gdrive(stdout=False)
+
+
+def list_gdrive():
+    _init_gdrive()
+    args = '''list --query " '{id}' in parents" --noheader'''.format(id=DIR_ID)
+    return run_gdrive(args=args)
+
+
+def info_gdrive(id, only_filename=False):
+    _init_gdrive()
+    args = 'info --id {id}'.format(id=id)
+    info = run_gdrive(args=args)
+    if only_filename:
+        return _info_gdrive_filename(stdout=info)
+    return info
+
+
+def _info_gdrive_filename(stdout):
+    for line in stdout.splitlines():
+        if line.startswith('Title: '):
+            return line.split()[-1]
+
+
+def upload_gdrive(filename):
+    _init_gdrive()
+    args = 'upload --file {file} --parent {id}'.format(file=filename,
+                                                       id=DIR_ID)
+    return run_gdrive(args=args)
+
+
+def _find_id_by_filename(filename):
+    if len(filename) > 40:
+        filename = filename[:19] + '...' + filename[-18:]
+    for line in list_gdrive().splitlines():
+        file_id, title = line.split()[:2]
+        if filename == title:
+            return file_id
+    else:
+        sys.stderr.write('file not found: {0}\n'.format(filename))
+        sys.stderr.write('Run `jsk_data ls --public` to find files.\n')
+        return
+
+
+def download_gdrive(filename):
+    _init_gdrive()
+    file_id = _find_id_by_filename(filename)
+    args = 'download --id {}'.format(file_id)
+    run_gdrive(args=args)
+
+
+def delete_gdrive(filename):
+    _init_gdrive()
+    file_id = _find_id_by_filename(filename)
+    args = 'delete --id {}'.format(file_id)
+    run_gdrive(args=args)