Skip to content

Commit

Permalink
version 0.6.1
Browse files Browse the repository at this point in the history
  • Loading branch information
Michael Chase committed Nov 10, 2014
1 parent 8018295 commit 85bf726
Show file tree
Hide file tree
Showing 44 changed files with 7,578 additions and 14 deletions.
4 changes: 2 additions & 2 deletions BitcasaFileFetcher/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def parse(self):
help="Log file download progress every 60 secs")
mainparser.add_argument(
'--version', help="Displays version and exits",
action='version', version='%(prog)s 0.6.0')
action='version', version='%(prog)s 0.6.1')

downparser = subparsers.add_parser("download", parents=[mainparser],
help="Program to download files from bitcasa to local/network storage")
Expand Down Expand Up @@ -160,7 +160,7 @@ def main():
from helpers import utils
from getfiles import BitcasaDownload
from lib import BitcasaUtils
from lib.gdrive import GoogleDrive
from lib.gdrive import GoogleDrive

if args.run_level == Args.RUN_LEVEL_MAIN:
bitcasa_utils = BitcasaUtils()
Expand Down
2 changes: 1 addition & 1 deletion BitcasaFileFetcher/threads/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def download(args):
continue

if command_args.temp:
filehash = sha1("blob " + str(size_bytes) + "\0" + temp_file)
filehash = sha1("blob " + str(size_bytes) + "\0" + item["filepath"])
tmpname = filehash.hexdigest()
temp_file = os.path.join(command_args.temp, tmpname)
item["temppath"] = temp_file
Expand Down
3 changes: 2 additions & 1 deletion BitcasaFileFetcher/threads/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,9 @@ def upload(queue, should_exit, completed_uploads, results, args):
parent_id = item["filedir"]

log.info("Uploading %s %s", filename, size_str)
retriesleft = 3
retriesleft = 10
while retriesleft > 0 and not should_exit.is_set():
g.get_service()
try:
st = time.time()
timespan = 0
Expand Down
9 changes: 8 additions & 1 deletion CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -52,4 +52,11 @@
* Better handling of shutdown
* Resume interrupted downloads
* Better handling of api limit errors when downloading
* Tons more, its late, I'm going to sleep LOL
* Tons more, its late, I'm going to sleep LOL

## Version 0.6.1
* Using chardet to hopefully detect the proper encoding for file names
* Fixed a bug in the gdrive wrapper
* Set upload and download retry to 10
* During upload google drive will refresh the auth token if needed
* Use base64 path for sha1 hash to prevent unicode errors
13 changes: 5 additions & 8 deletions includes/helpers/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
CLIENTSECRET = "5669c999ac340185a7c80c28d12a4319"
REDIRECT_URI = "http://localhost:1115/bitcasafilelister/auth/"

import math, os, hashlib, logging, tempfile
import math, os, hashlib, logging, tempfile, chardet
BITCASA_TOKEN = os.path.abspath("bitcasa.ini")
GDRIVE_CREDS = os.path.abspath("gdrive.ini")
GDRIVE_SECRETS = os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), "../lib", "gdrive_secrets.ini"))
Expand Down Expand Up @@ -64,10 +64,7 @@ def md5sum(filename, blocksize=65536):
return hasher.hexdigest()

def get_decoded_name(nm):
try:
nm = nm.decode().encode('utf-8')
nm = "".join(i for i in nm if i not in "\/:*?<>|%\"")
nm = nm.strip()
except:
raise
return nm
nm = "".join(i for i in nm if i not in "\/:*?<>|%\"")
nm = nm.strip()
nm = nm.decode(chardet.detect(nm)["encoding"]).encode('utf-8')
return nm
32 changes: 32 additions & 0 deletions includes/lib/chardet/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
######################## BEGIN LICENSE BLOCK ########################
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################

__version__ = "2.3.0"
from sys import version_info


def detect(aBuf):
if ((version_info < (3, 0) and isinstance(aBuf, unicode)) or
(version_info >= (3, 0) and not isinstance(aBuf, bytes))):
raise ValueError('Expected a bytes object, not a unicode object')

from . import universaldetector
u = universaldetector.UniversalDetector()
u.reset()
u.feed(aBuf)
u.close()
return u.result
925 changes: 925 additions & 0 deletions includes/lib/chardet/big5freq.py

Large diffs are not rendered by default.

42 changes: 42 additions & 0 deletions includes/lib/chardet/big5prober.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
######################## BEGIN LICENSE BLOCK ########################
# The Original Code is Mozilla Communicator client code.
#
# The Initial Developer of the Original Code is
# Netscape Communications Corporation.
# Portions created by the Initial Developer are Copyright (C) 1998
# the Initial Developer. All Rights Reserved.
#
# Contributor(s):
# Mark Pilgrim - port to Python
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
# 02110-1301 USA
######################### END LICENSE BLOCK #########################

from .mbcharsetprober import MultiByteCharSetProber
from .codingstatemachine import CodingStateMachine
from .chardistribution import Big5DistributionAnalysis
from .mbcssm import Big5SMModel


class Big5Prober(MultiByteCharSetProber):
def __init__(self):
MultiByteCharSetProber.__init__(self)
self._mCodingSM = CodingStateMachine(Big5SMModel)
self._mDistributionAnalyzer = Big5DistributionAnalysis()
self.reset()

def get_charset_name(self):
return "Big5"
80 changes: 80 additions & 0 deletions includes/lib/chardet/chardetect.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#!/usr/bin/env python
"""
Script which takes one or more file paths and reports on their detected
encodings
Example::
% chardetect somefile someotherfile
somefile: windows-1252 with confidence 0.5
someotherfile: ascii with confidence 1.0
If no paths are provided, it takes its input from stdin.
"""

from __future__ import absolute_import, print_function, unicode_literals

import argparse
import sys
from io import open

from chardet import __version__
from chardet.universaldetector import UniversalDetector


def description_of(lines, name='stdin'):
"""
Return a string describing the probable encoding of a file or
list of strings.
:param lines: The lines to get the encoding of.
:type lines: Iterable of bytes
:param name: Name of file or collection of lines
:type name: str
"""
u = UniversalDetector()
for line in lines:
u.feed(line)
u.close()
result = u.result
if result['encoding']:
return '{0}: {1} with confidence {2}'.format(name, result['encoding'],
result['confidence'])
else:
return '{0}: no result'.format(name)


def main(argv=None):
'''
Handles command line arguments and gets things started.
:param argv: List of arguments, as if specified on the command-line.
If None, ``sys.argv[1:]`` is used instead.
:type argv: list of str
'''
# Get command line arguments
parser = argparse.ArgumentParser(
description="Takes one or more file paths and reports their detected \
encodings",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
conflict_handler='resolve')
parser.add_argument('input',
help='File whose encoding we would like to determine.',
type=argparse.FileType('rb'), nargs='*',
default=[sys.stdin])
parser.add_argument('--version', action='version',
version='%(prog)s {0}'.format(__version__))
args = parser.parse_args(argv)

for f in args.input:
if f.isatty():
print("You are running chardetect interactively. Press " +
"CTRL-D twice at the start of a blank line to signal the " +
"end of your input. If you want help, run chardetect " +
"--help\n", file=sys.stderr)
print(description_of(f, f.name))


if __name__ == '__main__':
main()
Loading

0 comments on commit 85bf726

Please sign in to comment.