Skip to content

Commit

Permalink
Merge pull request #930 from kyleknap/custom-sync
Browse files Browse the repository at this point in the history
Allow for easy creation of custom sync strategies
  • Loading branch information
kyleknap committed Oct 14, 2014
2 parents b8a493a + 5002f0c commit 5ce60be
Show file tree
Hide file tree
Showing 20 changed files with 1,270 additions and 378 deletions.
33 changes: 24 additions & 9 deletions awscli/customizations/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,14 +120,16 @@ class BasicCommand(CLICommand):

def __init__(self, session):
self._session = session
self._arg_table = None
self._subcommand_table = None

def __call__(self, args, parsed_globals):
# args is the remaining unparsed args.
# We might be able to parse these args so we need to create
# an arg parser and parse them.
subcommand_table = self._build_subcommand_table()
arg_table = self.arg_table
parser = ArgTableArgParser(arg_table, subcommand_table)
self._subcommand_table = self._build_subcommand_table()
self._arg_table = self._build_arg_table()
parser = ArgTableArgParser(self.arg_table, self.subcommand_table)
parsed_args, remaining = parser.parse_known_args(args)

# Unpack arguments
Expand All @@ -138,8 +140,8 @@ def __call__(self, args, parsed_globals):
# as these are how the parameters are stored in the
# `arg_table`.
xformed = key.replace('_', '-')
if xformed in arg_table:
cli_argument = arg_table[xformed]
if xformed in self.arg_table:
cli_argument = self.arg_table[xformed]

value = unpack_argument(
self._session,
Expand Down Expand Up @@ -178,8 +180,8 @@ def __call__(self, args, parsed_globals):
raise ValueError("Unknown options: %s" % ','.join(remaining))
return self._run_main(parsed_args, parsed_globals)
else:
return subcommand_table[parsed_args.subcommand](remaining,
parsed_globals)
return self.subcommand_table[parsed_args.subcommand](remaining,
parsed_globals)

def _validate_value_against_schema(self, model, value):
validate_parameters(value, model)
Expand Down Expand Up @@ -233,9 +235,10 @@ def create_help_command_table(self):
commands[command['name']] = command['command_class'](self._session)
return commands

@property
def arg_table(self):
def _build_arg_table(self):
arg_table = OrderedDict()
self._session.emit('building-arg-table.%s' % self.NAME,
arg_table=self.ARG_TABLE)
for arg_data in self.ARG_TABLE:

# If a custom schema was passed in, create the argument_model
Expand All @@ -249,6 +252,18 @@ def arg_table(self):
arg_table[arg_data['name']] = custom_argument
return arg_table

@property
def arg_table(self):
if self._arg_table is None:
self._arg_table = self._build_arg_table()
return self._arg_table

@property
def subcommand_table(self):
if self._subcommand_table is None:
self._subcommand_table = self._build_subcommand_table()
return self._subcommand_table

@classmethod
def add_command(cls, command_table, session, **kwargs):
command_table[cls.NAME] = cls(session)
Expand Down
110 changes: 20 additions & 90 deletions awscli/customizations/s3/comparator.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,30 +17,17 @@
LOG = logging.getLogger(__name__)


def total_seconds(td):
"""
timedelta's time_seconds() function for python 2.6 users
"""
return (td.microseconds + (td.seconds + td.days * 24 *
3600) * 10**6) / 10**6


class Comparator(object):
"""
This class performs all of the comparisons behind the sync operation
"""
def __init__(self, params=None):
self.delete = False
if 'delete' in params:
self.delete = params['delete']

self.compare_on_size_only = False
if 'size_only' in params:
self.compare_on_size_only = params['size_only']

self.match_exact_timestamps = False
if 'exact_timestamps' in params:
self.match_exact_timestamps = params['exact_timestamps']
def __init__(self, file_at_src_and_dest_sync_strategy,
file_not_at_dest_sync_strategy,
file_not_at_src_sync_strategy):

self._sync_strategy = file_at_src_and_dest_sync_strategy
self._not_at_dest_sync_strategy = file_not_at_dest_sync_strategy
self._not_at_src_sync_strategy = file_not_at_src_sync_strategy

def call(self, src_files, dest_files):
"""
Expand Down Expand Up @@ -107,63 +94,39 @@ def call(self, src_files, dest_files):
compare_keys = self.compare_comp_key(src_file, dest_file)

if compare_keys == 'equal':
same_size = self.compare_size(src_file, dest_file)
same_last_modified_time = self.compare_time(src_file, dest_file)

if self.compare_on_size_only:
should_sync = not same_size
else:
should_sync = (not same_size) or (not same_last_modified_time)

should_sync = self._sync_strategy.determine_should_sync(
src_file, dest_file
)
if should_sync:
LOG.debug("syncing: %s -> %s, size_changed: %s, "
"last_modified_time_changed: %s",
src_file.src, src_file.dest,
not same_size, not same_last_modified_time)
yield src_file
elif compare_keys == 'less_than':
src_take = True
dest_take = False
LOG.debug("syncing: %s -> %s, file does not exist at destination",
src_file.src, src_file.dest)
yield src_file
should_sync = self._not_at_dest_sync_strategy.determine_should_sync(src_file, None)
if should_sync:
yield src_file

elif compare_keys == 'greater_than':
src_take = False
dest_take = True
dest_file.operation_name = 'delete'
if self.delete:
LOG.debug("syncing: (None) -> %s (remove), file does "
"not exist at source (%s) and delete "
"mode enabled",
dest_file.src, dest_file.dest)
should_sync = self._not_at_src_sync_strategy.determine_should_sync(None, dest_file)
if should_sync:
yield dest_file

elif (not src_done) and dest_done:
src_take = True
LOG.debug("syncing: %s -> %s, file does not exist "
"at destination",
src_file.src, src_file.dest)
yield src_file
should_sync = self._not_at_dest_sync_strategy.determine_should_sync(src_file, None)
if should_sync:
yield src_file

elif src_done and (not dest_done):
dest_take = True
dest_file.operation_name = 'delete'
if self.delete:
LOG.debug("syncing: (None) -> %s (remove), file does not "
"exist at source (%s) and delete mode enabled",
dest_file.src, dest_file.dest)
should_sync = self._not_at_src_sync_strategy.determine_should_sync(None, dest_file)
if should_sync:
yield dest_file
else:
break

def compare_size(self, src_file, dest_file):
"""
:returns: True if the sizes are the same.
False otherwise.
"""
return src_file.size == dest_file.size

def compare_comp_key(self, src_file, dest_file):
"""
Determines if the source compare_key is less than, equal to,
Expand All @@ -180,36 +143,3 @@ def compare_comp_key(self, src_file, dest_file):

else:
return 'greater_than'

def compare_time(self, src_file, dest_file):
"""
:returns: True if the file does not need updating based on time of
last modification and type of operation.
False if the file does need updating based on the time of
last modification and type of operation.
"""
src_time = src_file.last_update
dest_time = dest_file.last_update
delta = dest_time - src_time
cmd = src_file.operation_name
if cmd == "upload" or cmd == "copy":
if total_seconds(delta) >= 0:
# Destination is newer than source.
return True
else:
# Destination is older than source, so
# we have a more recently updated file
# at the source location.
return False
elif cmd == "download":
if self.match_exact_timestamps:
# An update is needed unless the
# timestamps match exactly.
return total_seconds(delta) == 0

if total_seconds(delta) <= 0:
return True
else:
# delta is positive, so the destination
# is newer than the source.
return False
3 changes: 3 additions & 0 deletions awscli/customizations/s3/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
from awscli.customizations.commands import BasicCommand
from awscli.customizations.s3.subcommands import ListCommand, WebsiteCommand, \
CpCommand, MvCommand, RmCommand, SyncCommand, MbCommand, RbCommand
from awscli.customizations.s3.syncstrategy.register import \
register_sync_strategies


def awscli_initialize(cli):
Expand All @@ -24,6 +26,7 @@ def awscli_initialize(cli):
file
"""
cli.register("building-command-table.main", add_s3)
cli.register('building-command-table.sync', register_sync_strategies)


def s3_plugin_initialize(event_handlers):
Expand Down
56 changes: 34 additions & 22 deletions awscli/customizations/s3/subcommands.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@
from awscli.customizations.s3.s3handler import S3Handler, S3StreamHandler
from awscli.customizations.s3.utils import find_bucket_key, uni_print, \
AppendFilter, find_dest_path_comp_key
from awscli.customizations.s3.syncstrategy.base import MissingFileSync, \
SizeAndLastModifiedSync, NeverSync



RECURSIVE = {'name': 'recursive', 'action': 'store_true', 'dest': 'dir_op',
Expand All @@ -40,11 +43,6 @@
"Displays the operations that would be performed using the "
"specified command without actually running them.")}

DELETE = {'name': 'delete', 'action': 'store_true',
'help_text': (
"Files that exist in the destination but not in the source are "
"deleted during sync.")}

QUIET = {'name': 'quiet', 'action': 'store_true',
'help_text': (
"Does not display the operations performed from the specified "
Expand Down Expand Up @@ -178,19 +176,6 @@
EXPIRES = {'name': 'expires', 'nargs': 1, 'help_text': ("The date and time at "
"which the object is no longer cacheable.")}

SIZE_ONLY = {'name': 'size-only', 'action': 'store_true',
'help_text': (
'Makes the size of each key the only criteria used to '
'decide whether to sync from source to destination.')}

EXACT_TIMESTAMPS = {'name': 'exact-timestamps', 'action': 'store_true',
'help_text': (
'When syncing from S3 to local, same-sized '
'items will be ignored only when the timestamps '
'match exactly. The default behavior is to ignore '
'same-sized items unless the local version is newer '
'than the S3 version.')}

INDEX_DOCUMENT = {'name': 'index-document',
'help_text': (
'A suffix that is appended to a request that is for '
Expand Down Expand Up @@ -226,8 +211,6 @@
CACHE_CONTROL, CONTENT_DISPOSITION, CONTENT_ENCODING,
CONTENT_LANGUAGE, EXPIRES, SOURCE_REGION, ONLY_SHOW_ERRORS]

SYNC_ARGS = [DELETE, EXACT_TIMESTAMPS, SIZE_ONLY] + TRANSFER_ARGS


def get_endpoint(service, region, endpoint_url, verify):
return service.get_endpoint(region_name=region, endpoint_url=endpoint_url,
Expand Down Expand Up @@ -455,7 +438,7 @@ class SyncCommand(S3TransferCommand):
USAGE = "<LocalPath> <S3Path> or <S3Path> " \
"<LocalPath> or <S3Path> <S3Path>"
ARG_TABLE = [{'name': 'paths', 'nargs': 2, 'positional_arg': True,
'synopsis': USAGE}] + SYNC_ARGS
'synopsis': USAGE}] + TRANSFER_ARGS
EXAMPLES = BasicCommand.FROM_FILE('s3/sync.rst')


Expand Down Expand Up @@ -535,6 +518,33 @@ def needs_filegenerator(self):
return False
else:
return True

def choose_sync_strategies(self):
"""Determines the sync strategy for the command.
It defaults to the default sync strategies but a customizable sync
strategy can overide the default strategy if it returns the instance
of its self when the event is emitted.
"""
sync_strategies = {}
# Set the default strategies.
sync_strategies['file_at_src_and_dest_sync_strategy'] = \
SizeAndLastModifiedSync()
sync_strategies['file_not_at_dest_sync_strategy'] = MissingFileSync()
sync_strategies['file_not_at_src_sync_strategy'] = NeverSync()

# Determine what strategies to overide if any.
responses = self.session.emit(
'choosing-s3-sync-strategy', params=self.parameters)
if responses is not None:
for response in responses:
override_sync_strategy = response[1]
if override_sync_strategy is not None:
sync_type = override_sync_strategy.sync_type
sync_type += '_sync_strategy'
sync_strategies[sync_type] = override_sync_strategy

return sync_strategies

def run(self):
"""
Expand Down Expand Up @@ -609,14 +619,16 @@ def run(self):
s3_stream_handler = S3StreamHandler(self.session, self.parameters,
result_queue=result_queue)

sync_strategies = self.choose_sync_strategies()

command_dict = {}
if self.cmd == 'sync':
command_dict = {'setup': [files, rev_files],
'file_generator': [file_generator,
rev_generator],
'filters': [create_filter(self.parameters),
create_filter(self.parameters)],
'comparator': [Comparator(self.parameters)],
'comparator': [Comparator(**sync_strategies)],
'file_info_builder': [file_info_builder],
's3_handler': [s3handler]}
elif self.cmd == 'cp' and self.parameters['is_stream']:
Expand Down
12 changes: 12 additions & 0 deletions awscli/customizations/s3/syncstrategy/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# Copyright 2013 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You
# may not use this file except in compliance with the License. A copy of
# the License is located at
#
# http://aws.amazon.com/apache2.0/
#
# or in the "license" file accompanying this file. This file is
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific
# language governing permissions and limitations under the License.
Loading

0 comments on commit 5ce60be

Please sign in to comment.