Skip to content

Commit

Permalink
Merge pull request #1324 from wkentaro/faster-uploading-of-jsk-data
Browse files Browse the repository at this point in the history
[jsk_data] Diretly use drive command locally for faster uploading
  • Loading branch information
garaemon committed Dec 28, 2015
2 parents 1bf583c + 7304206 commit 149b2fa
Show file tree
Hide file tree
Showing 3 changed files with 171 additions and 54 deletions.
Binary file added jsk_data/scripts/drive-linux-x64
Binary file not shown.
132 changes: 78 additions & 54 deletions jsk_data/src/jsk_data/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@
import click
from jsk_tools.cltool import percol_select

from jsk_data.gdrive import delete_gdrive
from jsk_data.gdrive import download_gdrive
from jsk_data.gdrive import info_gdrive
from jsk_data.gdrive import list_gdrive
from jsk_data.gdrive import upload_gdrive
from jsk_data.ssh import connect_ssh
from jsk_data.ssh import get_user_by_hostname
from jsk_data.util import filename_with_timestamp
Expand Down Expand Up @@ -46,30 +51,33 @@ def cli():
def cmd_get(public, query):
"""Download specified file."""
if not query:
candidates = _list_aries_files(public=public)
if public:
lines = list_gdrive().splitlines()
candidates = [l.split()[1] for l in lines]
else:
candidates = _list_aries_files(public=public)
selected = percol_select(candidates)
if len(selected) != 1:
sys.stderr.write('Please select 1 filename.\n')
sys.exit(1)
query = selected[0]
sys.stderr.write('Selected: {0}\n'.format(query))

public_level = 'public' if public else 'private'
cmd = 'rsync -avz --progress -e "ssh -o StrictHostKeyChecking=no"\
--bwlimit=100000 {usr}@{host}:{dir}/{lv}/{q} .'
cmd = cmd.format(usr=LOGIN_USER, host=HOST,
dir=DATA_DIR, lv=public_level, q=query)
subprocess.call(shlex.split(cmd))
if public:
download_gdrive(filename=query)
else:
cmd = 'rsync -avz --progress -e "ssh -o StrictHostKeyChecking=no"\
--bwlimit=100000 {usr}@{host}:{dir}/private/{q} .'
cmd = cmd.format(usr=LOGIN_USER, host=HOST, dir=DATA_DIR, q=query)
subprocess.call(shlex.split(cmd))


def _list_aries_files(public, query=None, ls_options=None):
public_level = 'public' if public else 'private'
def _list_aries_files(query=None, ls_options=None):
query = query or ''
ls_options = ls_options or []
with connect_ssh(HOST, LOGIN_USER) as ssh:
cmd = 'ls {opt} {dir}/{lv}/{q}'
cmd = cmd.format(opt=' '.join(ls_options), dir=DATA_DIR,
lv=public_level, q=query)
cmd = 'ls {opt} {dir}/private/{q}'
cmd = cmd.format(opt=' '.join(ls_options), dir=DATA_DIR, q=query)
_, stdout, _ = ssh.exec_command(cmd)
files = stdout.read().splitlines()
return files
Expand All @@ -96,7 +104,13 @@ def cmd_ls(public, query, show_size, sort, reverse):
if reverse:
ls_options.append('--reverse')

print('\n'.join(_list_aries_files(public, query, ls_options)))
if public:
if ls_options:
sys.stderr.write(
'WARNING: if public=True, ignores all ls options\n')
sys.stdout.write(list_gdrive())
else:
print('\n'.join(_list_aries_files(query, ls_options)))


@cli.command(name='put', help='Upload file to aries.')
Expand All @@ -107,8 +121,6 @@ def cmd_ls(public, query, show_size, sort, reverse):
@click.argument('filename', required=True, type=click.Path(exists=True))
def cmd_put(public, filename):
"""Upload file to aries."""
public_level = 'public' if public else 'private'

filename_org = filename
filename = filename_with_timestamp(filename)
if filename_org != filename:
Expand All @@ -120,31 +132,26 @@ def cmd_put(public, filename):
sys.exit(1)
os.rename(filename_org, filename)

print('Uploading to aries...')
cmd = 'rsync -avz --progress -e "ssh -o StrictHostKeyChecking=no"\
--bwlimit=100000 {file} {usr}@{host}:{dir}/{lv}/'
cmd = cmd.format(file=filename, usr=LOGIN_USER, host=HOST,
dir=DATA_DIR, lv=public_level)
subprocess.call(shlex.split(cmd))
print('Done.')
if public_level == 'private':
sys.exit(0)

print('Uploading to Google Drive...')
with connect_ssh(HOST, LOGIN_USER) as ssh:
cmd = '{dir}/scripts/upload-public-data.sh {dir}/public/{file}'
cmd = cmd.format(dir=DATA_DIR, file=filename)
_, stdout, stderr = ssh.exec_command(cmd)
for line in stdout.readlines():
if public:
print('Uploading to Google Drive...')
stdout = upload_gdrive(filename)
for line in stdout.splitlines():
if line.startswith('Title:'):
filename = line.split(' ')[-1].strip()
elif line.startswith('Id:'):
file_id = line.split(' ')[-1].strip()
sys.stderr.write(stderr.read())
print('Done.')
print('You can download it by:')
dl_url = google_drive_file_url(file_id, download=True)
print('$ wget {url} -O {file}'.format(url=dl_url, file=filename))
print('Done.')
print('You can download it by:')
dl_url = google_drive_file_url(file_id, download=True)
print('$ wget {url} -O {file}'.format(url=dl_url, file=filename))
else:
print('Uploading to aries...')
cmd = 'rsync -avz --progress -e "ssh -o StrictHostKeyChecking=no"\
--bwlimit=100000 {file} {usr}@{host}:{dir}/private/'
cmd = cmd.format(file=filename, usr=LOGIN_USER, host=HOST,
dir=DATA_DIR)
subprocess.call(shlex.split(cmd))
print('Done.')


@cli.command(name='pubinfo', help='Show public data info.')
Expand All @@ -153,28 +160,24 @@ def cmd_put(public, filename):
help='Print out download command')
def cmd_pubinfo(filename, show_dl_cmd):
if not filename:
candidates = _list_aries_files(public=True)
# FIXME: gdrive does not return full title if it is longer than 40
candidates = list_gdrive().splitlines()
selected = percol_select(candidates)
if len(selected) != 1:
sys.stderr.write('Please select 1 filename.\n')
sys.exit(1)
filename = selected[0]

with connect_ssh(HOST, LOGIN_USER) as ssh:
cmd = '{dir}/scripts/list-public-data.sh'.format(dir=DATA_DIR)
_, stdout, stderr = ssh.exec_command(cmd)
stdout.next() # drop header
for line in stdout.readlines():
file_id, title = line.split()[:2]
# FIXME: gdrive does not return full title if it is longer than 40
if len(filename) > 40:
filename = filename[:19] + '...' + filename[-18:]
if filename == title:
break
else:
sys.stderr.write('file not found: {0}\n'.format(filename))
sys.stderr.write('Run `jsk_data ls --public` to find files.\n')
return
filename = selected[0].split()[1]

stdout = list_gdrive()
for line in stdout.splitlines():
file_id, title = line.split()[:2]
if filename == title:
filename = info_gdrive(id=file_id, only_filename=True)
break
else:
sys.stderr.write('file not found: {0}\n'.format(filename))
sys.stderr.write('Run `jsk_data ls --public` to find files.\n')
return

dl_url = google_drive_file_url(file_id, download=True)
if show_dl_cmd:
Expand All @@ -189,3 +192,24 @@ def cmd_pubinfo(filename, show_dl_cmd):
Download URL: {dl_url}'''.format(id=file_id, file=filename,
view_url=view_url, dl_url=dl_url)
print(info)


@cli.command(name='delete', help='Delete specified file.')
@click.option('-p', '--public', is_flag=True, help='Handle public files.')
@click.argument('filename', default='')
def cmd_delete(public, filename):
"""Delete specified file."""
if not public:
sys.stderr.write('ERROR: public=False is not supported\n')
sys.exit(1)

if not filename:
# FIXME: gdrive does not return full title if it is longer than 40
candidates = list_gdrive().splitlines()
selected = percol_select(candidates)
if len(selected) != 1:
sys.stderr.write('Please select 1 filename.\n')
sys.exit(1)
filename = selected[0].split()[1]

delete_gdrive(filename=filename)
93 changes: 93 additions & 0 deletions jsk_data/src/jsk_data/gdrive.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""Wrapper module for gdrive command"""

import os
import subprocess
import sys


# directory id in google drive of jsk
DIR_ID = '0B9P1L--7Wd2vUGplQkVLTFBWcFE'


def run_gdrive(args=None, stdout=True):
if args is None:
args = ''
ros_home = os.getenv('ROS_HOME', os.path.expanduser('~/.ros'))
pkg_ros_home = os.path.join(ros_home, 'jsk_data')
config = os.path.join(pkg_ros_home, '.gdrive')
cmd = 'rosrun jsk_data drive-linux-x64 --config {config} {args}'\
.format(args=args, config=config)
if stdout:
return subprocess.check_output(cmd, shell=True)
else:
subprocess.call(cmd, shell=True)


def _init_gdrive():
"""This should be called before any commands with gdrive"""
ros_home = os.getenv('ROS_HOME', os.path.expanduser('~/.ros'))
pkg_ros_home = os.path.join(ros_home, 'jsk_data')
config = os.path.join(pkg_ros_home, '.gdrive')
if os.path.exists(config):
return
if not os.path.exists(pkg_ros_home):
os.makedirs(pkg_ros_home)
run_gdrive(stdout=False)


def list_gdrive():
_init_gdrive()
args = '''list --query " '{id}' in parents" --noheader'''.format(id=DIR_ID)
return run_gdrive(args=args)


def info_gdrive(id, only_filename=False):
_init_gdrive()
args = 'info --id {id}'.format(id=id)
info = run_gdrive(args=args)
if only_filename:
return _info_gdrive_filename(stdout=info)
return info


def _info_gdrive_filename(stdout):
for line in stdout.splitlines():
if line.startswith('Title: '):
return line.split()[-1]


def upload_gdrive(filename):
_init_gdrive()
args = 'upload --file {file} --parent {id}'.format(file=filename,
id=DIR_ID)
return run_gdrive(args=args)


def _find_id_by_filename(filename):
if len(filename) > 40:
filename = filename[:19] + '...' + filename[-18:]
for line in list_gdrive().splitlines():
file_id, title = line.split()[:2]
if filename == title:
return file_id
else:
sys.stderr.write('file not found: {0}\n'.format(filename))
sys.stderr.write('Run `jsk_data ls --public` to find files.\n')
return


def download_gdrive(filename):
_init_gdrive()
file_id = _find_id_by_filename(filename)
args = 'download --id {}'.format(file_id)
run_gdrive(args=args)


def delete_gdrive(filename):
_init_gdrive()
file_id = _find_id_by_filename(filename)
args = 'delete --id {}'.format(file_id)
run_gdrive(args=args)

0 comments on commit 149b2fa

Please sign in to comment.