diff --git a/.gitignore b/.gitignore index 1d1c993c8e..6e12af5603 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ *.pyc .cache/ -.idea/ \ No newline at end of file +.idea/ +*.egg-info/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000000..2c72a38ce9 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,10 @@ +repos: +- repo: https://github.com/ambv/black + rev: cad4138050b86d1c8570b926883e32f7465c2880 + hooks: + - id: black + language_version: python3.7 +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v2.3.0 + hooks: + - id: flake8 diff --git a/.travis.yml b/.travis.yml index 9022610059..c62b9956e7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -22,6 +22,8 @@ install: script: - GCSFS_RECORD_MODE=none py.test -vv gcsfs + - pip install flake8 ; flake8 gcsfs + - pip install git+https://github.com/psf/black@cad4138050b86d1c8570b926883e32f7465c2880; black gcsfs --check notifications: email: false diff --git a/docs/source/conf.py b/docs/source/conf.py index d189694a34..a0eacc74e2 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -19,43 +19,43 @@ # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -#sys.path.insert(0, os.path.abspath('.')) +# sys.path.insert(0, os.path.abspath('.')) # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. -#needs_sphinx = '1.0' +# needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.todo', - 'sphinx.ext.ifconfig', - 'sphinx.ext.viewcode', - 'sphinx.ext.autosummary', - 'numpydoc', + "sphinx.ext.autodoc", + "sphinx.ext.todo", + "sphinx.ext.ifconfig", + "sphinx.ext.viewcode", + "sphinx.ext.autosummary", + "numpydoc", ] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # source_suffix = ['.rst', '.md'] -source_suffix = '.rst' +source_suffix = ".rst" # The encoding of source files. -#source_encoding = 'utf-8-sig' +# source_encoding = 'utf-8-sig' # The master toctree document. -master_doc = 'index' +master_doc = "index" # General information about the project. -project = 'GCSFs' -copyright = '2017, Continuum Analytics' -author = 'Continuum Analytics' +project = "GCSFs" +copyright = "2017, Continuum Analytics" +author = "Continuum Analytics" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -63,6 +63,7 @@ # # The short X.Y version. import gcsfs + version = gcsfs.__version__ # The full version, including alpha/beta/rc tags. release = version @@ -76,9 +77,9 @@ # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: -#today = '' +# today = '' # Else, today_fmt is used as the format for a strftime call. -#today_fmt = '%B %d, %Y' +# today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. @@ -86,27 +87,27 @@ # The reST default role (used for this markup: `text`) to use for all # documents. -#default_role = None +# default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True +# add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). -#add_module_names = True +# add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. -#show_authors = False +# show_authors = False # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # A list of ignored prefixes for module index sorting. -#modindex_common_prefix = [] +# modindex_common_prefix = [] # If true, keep warnings as "system message" paragraphs in the built documents. -#keep_warnings = False +# keep_warnings = False # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = False @@ -116,161 +117,155 @@ # Taken from docs.readthedocs.io: # on_rtd is whether we are on readthedocs.io -on_rtd = os.environ.get('READTHEDOCS', None) == 'True' +on_rtd = os.environ.get("READTHEDOCS", None) == "True" if not on_rtd: # only import and set the theme if we're building docs locally import sphinx_rtd_theme - html_theme = 'sphinx_rtd_theme' + + html_theme = "sphinx_rtd_theme" html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. -#html_theme_options = {} +# html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. -#html_theme_path = [] +# html_theme_path = [] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". -#html_title = None +# html_title = None # A shorter title for the navigation bar. Default is the same as html_title. -#html_short_title = None +# html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. -#html_logo = None +# html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. -#html_favicon = None +# html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied # directly to the root of the documentation. -#html_extra_path = [] +# html_extra_path = [] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. -#html_last_updated_fmt = '%b %d, %Y' +# html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. -#html_use_smartypants = True +# html_use_smartypants = True # Custom sidebar templates, maps document names to template names. -#html_sidebars = {} +# html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. -#html_additional_pages = {} +# html_additional_pages = {} # If false, no module index is generated. -#html_domain_indices = True +# html_domain_indices = True # If false, no index is generated. -#html_use_index = True +# html_use_index = True # If true, the index is split into individual pages for each letter. -#html_split_index = False +# html_split_index = False # If true, links to the reST sources are added to the pages. -#html_show_sourcelink = True +# html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -#html_show_sphinx = True +# html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. -#html_show_copyright = True +# html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. -#html_use_opensearch = '' +# html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). -#html_file_suffix = None +# html_file_suffix = None # Language to be used for generating the HTML full-text search index. # Sphinx supports the following languages: # 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja' # 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr' -#html_search_language = 'en' +# html_search_language = 'en' # A dictionary with options for the search language support, empty by default. # Now only 'ja' uses this config value -#html_search_options = {'type': 'default'} +# html_search_options = {'type': 'default'} # The name of a javascript file (relative to the configuration directory) that # implements a search results scorer. If empty, the default will be used. -#html_search_scorer = 'scorer.js' +# html_search_scorer = 'scorer.js' # Output file base name for HTML help builder. -htmlhelp_basename = 'GCSFSdoc' +htmlhelp_basename = "GCSFSdoc" # -- Options for LaTeX output --------------------------------------------- latex_elements = { -# The paper size ('letterpaper' or 'a4paper'). -#'papersize': 'letterpaper', - -# The font size ('10pt', '11pt' or '12pt'). -#'pointsize': '10pt', - -# Additional stuff for the LaTeX preamble. -#'preamble': '', - -# Latex figure (float) alignment -#'figure_align': 'htbp', + # The paper size ('letterpaper' or 'a4paper'). + #'papersize': 'letterpaper', + # The font size ('10pt', '11pt' or '12pt'). + #'pointsize': '10pt', + # Additional stuff for the LaTeX preamble. + #'preamble': '', + # Latex figure (float) alignment + #'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - (master_doc, 'GCSFs.tex', 'GCSFs Documentation', - 'Continuum Analytics', 'manual'), + (master_doc, "GCSFs.tex", "GCSFs Documentation", "Continuum Analytics", "manual") ] # The name of an image file (relative to this directory) to place at the top of # the title page. -#latex_logo = None +# latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. -#latex_use_parts = False +# latex_use_parts = False # If true, show page references after internal links. -#latex_show_pagerefs = False +# latex_show_pagerefs = False # If true, show URL addresses after external links. -#latex_show_urls = False +# latex_show_urls = False # Documents to append as an appendix to all manuals. -#latex_appendices = [] +# latex_appendices = [] # If false, no module index is generated. -#latex_domain_indices = True +# latex_domain_indices = True # -- Options for manual page output --------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, 'gcsfs', 'GCSFs Documentation', - [author], 1) -] +man_pages = [(master_doc, "gcsfs", "GCSFs Documentation", [author], 1)] # If true, show URL addresses after external links. -#man_show_urls = False +# man_show_urls = False # -- Options for Texinfo output ------------------------------------------- @@ -279,19 +274,25 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (master_doc, 'GCSFs', 'GCSFs Documentation', - author, 'GCSFs', 'One line description of project.', - 'Miscellaneous'), + ( + master_doc, + "GCSFs", + "GCSFs Documentation", + author, + "GCSFs", + "One line description of project.", + "Miscellaneous", + ) ] # Documents to append as an appendix to all manuals. -#texinfo_appendices = [] +# texinfo_appendices = [] # If false, no module index is generated. -#texinfo_domain_indices = True +# texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. -#texinfo_show_urls = 'footnote' +# texinfo_show_urls = 'footnote' # If true, do not generate a @detailmenu in the "Top" node's menu. -#texinfo_no_detailmenu = False +# texinfo_no_detailmenu = False diff --git a/gcsfs/__init__.py b/gcsfs/__init__.py index 0e850659ef..82b7be0c3e 100644 --- a/gcsfs/__init__.py +++ b/gcsfs/__init__.py @@ -1,8 +1,8 @@ -from __future__ import absolute_import - from .core import GCSFileSystem from .mapping import GCSMap from ._version import get_versions -__version__ = get_versions()['version'] +__version__ = get_versions()["version"] del get_versions + +__all__ = ["GCSFileSystem", "GCSMap"] diff --git a/gcsfs/_version.py b/gcsfs/_version.py index 6297f24df7..df261c5fdf 100644 --- a/gcsfs/_version.py +++ b/gcsfs/_version.py @@ -1,4 +1,3 @@ - # This file helps to compute a version number in source trees obtained from # git-archive tarball (such as those provided by githubs download-from-tag # feature). Distribution tarballs (built by setup.py sdist) and build @@ -58,17 +57,18 @@ class NotThisMethod(Exception): def register_vcs_handler(vcs, method): # decorator """Decorator to mark a method as the handler for a particular VCS.""" + def decorate(f): """Store f in HANDLERS[vcs][method].""" if vcs not in HANDLERS: HANDLERS[vcs] = {} HANDLERS[vcs][method] = f return f + return decorate -def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, - env=None): +def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None): """Call the given command(s).""" assert isinstance(commands, list) p = None @@ -76,10 +76,13 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, try: dispcmd = str([c] + args) # remember shell=False, so use git.cmd on windows, not just git - p = subprocess.Popen([c] + args, cwd=cwd, env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None)) + p = subprocess.Popen( + [c] + args, + cwd=cwd, + env=env, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr else None), + ) break except EnvironmentError: e = sys.exc_info()[1] @@ -116,16 +119,22 @@ def versions_from_parentdir(parentdir_prefix, root, verbose): for i in range(3): dirname = os.path.basename(root) if dirname.startswith(parentdir_prefix): - return {"version": dirname[len(parentdir_prefix):], - "full-revisionid": None, - "dirty": False, "error": None, "date": None} + return { + "version": dirname[len(parentdir_prefix) :], + "full-revisionid": None, + "dirty": False, + "error": None, + "date": None, + } else: rootdirs.append(root) root = os.path.dirname(root) # up a level if verbose: - print("Tried directories %s but none started with prefix %s" % - (str(rootdirs), parentdir_prefix)) + print( + "Tried directories %s but none started with prefix %s" + % (str(rootdirs), parentdir_prefix) + ) raise NotThisMethod("rootdir doesn't start with parentdir_prefix") @@ -181,7 +190,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) + tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)]) if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d @@ -190,7 +199,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r'\d', r)]) + tags = set([r for r in refs if re.search(r"\d", r)]) if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: @@ -198,19 +207,26 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): for ref in sorted(tags): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): - r = ref[len(tag_prefix):] + r = ref[len(tag_prefix) :] if verbose: print("picking %s" % r) - return {"version": r, - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": None, - "date": date} + return { + "version": r, + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": None, + "date": date, + } # no suitable tags, so version is "0+unknown", but full hex is still there if verbose: print("no suitable tags, using unknown + full revision id") - return {"version": "0+unknown", - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": "no suitable tags", "date": None} + return { + "version": "0+unknown", + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": "no suitable tags", + "date": None, + } @register_vcs_handler("git", "pieces_from_vcs") @@ -225,8 +241,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] - out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, - hide_stderr=True) + out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True) if rc != 0: if verbose: print("Directory %s not under git control" % root) @@ -234,10 +249,19 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", - "--always", "--long", - "--match", "%s*" % tag_prefix], - cwd=root) + describe_out, rc = run_command( + GITS, + [ + "describe", + "--tags", + "--dirty", + "--always", + "--long", + "--match", + "%s*" % tag_prefix, + ], + cwd=root, + ) # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") @@ -260,17 +284,16 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): dirty = git_describe.endswith("-dirty") pieces["dirty"] = dirty if dirty: - git_describe = git_describe[:git_describe.rindex("-dirty")] + git_describe = git_describe[: git_describe.rindex("-dirty")] # now we have TAG-NUM-gHEX or HEX if "-" in git_describe: # TAG-NUM-gHEX - mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) + mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe) if not mo: # unparseable. Maybe git-describe is misbehaving? - pieces["error"] = ("unable to parse git-describe output: '%s'" - % describe_out) + pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out return pieces # tag @@ -279,10 +302,12 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) - pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" - % (full_tag, tag_prefix)) + pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( + full_tag, + tag_prefix, + ) return pieces - pieces["closest-tag"] = full_tag[len(tag_prefix):] + pieces["closest-tag"] = full_tag[len(tag_prefix) :] # distance: number of commits since tag pieces["distance"] = int(mo.group(2)) @@ -293,13 +318,13 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): else: # HEX: no tags pieces["closest-tag"] = None - count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], - cwd=root) + count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], cwd=root) pieces["distance"] = int(count_out) # total number of commits # commit date: see ISO-8601 comment in git_versions_from_keywords() - date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], - cwd=root)[0].strip() + date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[ + 0 + ].strip() pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) return pieces @@ -330,8 +355,7 @@ def render_pep440(pieces): rendered += ".dirty" else: # exception #1 - rendered = "0+untagged.%d.g%s" % (pieces["distance"], - pieces["short"]) + rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered @@ -445,11 +469,13 @@ def render_git_describe_long(pieces): def render(pieces, style): """Render the given version pieces into the requested style.""" if pieces["error"]: - return {"version": "unknown", - "full-revisionid": pieces.get("long"), - "dirty": None, - "error": pieces["error"], - "date": None} + return { + "version": "unknown", + "full-revisionid": pieces.get("long"), + "dirty": None, + "error": pieces["error"], + "date": None, + } if not style or style == "default": style = "pep440" # the default @@ -469,9 +495,13 @@ def render(pieces, style): else: raise ValueError("unknown style '%s'" % style) - return {"version": rendered, "full-revisionid": pieces["long"], - "dirty": pieces["dirty"], "error": None, - "date": pieces.get("date")} + return { + "version": rendered, + "full-revisionid": pieces["long"], + "dirty": pieces["dirty"], + "error": None, + "date": pieces.get("date"), + } def get_versions(): @@ -485,8 +515,7 @@ def get_versions(): verbose = cfg.verbose try: - return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, - verbose) + return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose) except NotThisMethod: pass @@ -495,13 +524,16 @@ def get_versions(): # versionfile_source is the relative path from the top of the source # tree (where the .git directory might live) to this file. Invert # this to find the root from __file__. - for i in cfg.versionfile_source.split('/'): + for i in cfg.versionfile_source.split("/"): root = os.path.dirname(root) except NameError: - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to find root of source tree", - "date": None} + return { + "version": "0+unknown", + "full-revisionid": None, + "dirty": None, + "error": "unable to find root of source tree", + "date": None, + } try: pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) @@ -515,6 +547,10 @@ def get_versions(): except NotThisMethod: pass - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, - "error": "unable to compute version", "date": None} + return { + "version": "0+unknown", + "full-revisionid": None, + "dirty": None, + "error": "unable to compute version", + "date": None, + } diff --git a/gcsfs/cli/gcsfuse.py b/gcsfs/cli/gcsfuse.py index 06f69f4940..2058ac39d5 100644 --- a/gcsfs/cli/gcsfuse.py +++ b/gcsfs/cli/gcsfuse.py @@ -6,23 +6,39 @@ @click.command() -@click.argument('bucket', type=str, required=True) -@click.argument('mount_point', type=str, required=True) -@click.option('--token', type=str, required=False, default=None, - help="Token to use for authentication") -@click.option('--project-id', type=str, required=False, default='', - help="Billing Project ID") -@click.option('--foreground/--background', default=True, - help="Run in the foreground or as a background process") -@click.option('--threads/--no-threads', default=True, - help="Whether to run with threads") -@click.option('--cache_files', type=int, default=10, - help="Number of open files to cache") -@click.option('-v', '--verbose', count=True, - help="Set logging level. '-v' for 'gcsfuse' logging." - "'-v -v' for complete debug logging.") -def main(bucket, mount_point, token, project_id, foreground, threads, - cache_files, verbose): +@click.argument("bucket", type=str, required=True) +@click.argument("mount_point", type=str, required=True) +@click.option( + "--token", + type=str, + required=False, + default=None, + help="Token to use for authentication", +) +@click.option( + "--project-id", type=str, required=False, default="", help="Billing Project ID" +) +@click.option( + "--foreground/--background", + default=True, + help="Run in the foreground or as a background process", +) +@click.option( + "--threads/--no-threads", default=True, help="Whether to run with threads" +) +@click.option( + "--cache_files", type=int, default=10, help="Number of open files to cache" +) +@click.option( + "-v", + "--verbose", + count=True, + help="Set logging level. '-v' for 'gcsfuse' logging." + "'-v -v' for complete debug logging.", +) +def main( + bucket, mount_point, token, project_id, foreground, threads, cache_files, verbose +): """ Mount a Google Cloud Storage (GCS) bucket to a local directory """ if verbose == 1: @@ -31,7 +47,7 @@ def main(bucket, mount_point, token, project_id, foreground, threads, if verbose > 1: logging.basicConfig(level=logging.DEBUG) - fmt = '%(asctime)s %(name)-12s %(levelname)-8s %(message)s' + fmt = "%(asctime)s %(name)-12s %(levelname)-8s %(message)s" if verbose == 1: logging.basicConfig(level=logging.INFO, format=fmt) logging.getLogger("gcsfs.gcsfuse").setLevel(logging.DEBUG) @@ -39,10 +55,14 @@ def main(bucket, mount_point, token, project_id, foreground, threads, logging.basicConfig(level=logging.DEBUG, format=fmt) print("Mounting bucket %s to directory %s" % (bucket, mount_point)) - print('foreground:', foreground, ', nothreads:', not threads) - FUSE(GCSFS(bucket, token=token, project=project_id, nfiles=cache_files), - mount_point, nothreads=not threads, foreground=foreground) + print("foreground:", foreground, ", nothreads:", not threads) + FUSE( + GCSFS(bucket, token=token, project=project_id, nfiles=cache_files), + mount_point, + nothreads=not threads, + foreground=foreground, + ) -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/gcsfs/core.py b/gcsfs/core.py index c65ff78c51..a55a9ac087 100644 --- a/gcsfs/core.py +++ b/gcsfs/core.py @@ -6,7 +6,6 @@ import decorator -import array from base64 import b64encode, b64decode import google.auth as gauth import google.auth.compute_engine @@ -32,7 +31,7 @@ import random from requests.exceptions import RequestException, ProxyError -from .utils import HttpError, RateLimitException, is_retriable, read_block +from .utils import HttpError, RateLimitException, is_retriable PY2 = sys.version_info.major == 2 @@ -46,7 +45,7 @@ @decorator.decorator def _tracemethod(f, self, *args, **kwargs): logger.debug("%s(args=%s, kwargs=%s)", f.__name__, args, kwargs) - if _TRACE_METHOD_INVOCATIONS and logger.isEnabledFor(logging.DEBUG-1): + if _TRACE_METHOD_INVOCATIONS and logger.isEnabledFor(logging.DEBUG - 1): tb_io = io.StringIO() traceback.print_stack(file=tb_io) logger.log(logging.DEBUG - 1, tb_io.getvalue()) @@ -55,21 +54,36 @@ def _tracemethod(f, self, *args, **kwargs): # client created 2018-01-16 -not_secret = {"client_id": "586241054156-9kst7ltfj66svc342pcn43vp6ta3idin" - ".apps.googleusercontent.com", - "client_secret": "xto0LIFYX35mmHF9T1R2QBqT"} -client_config = {'installed': { - 'client_id': not_secret['client_id'], - 'client_secret': not_secret['client_secret'], - "auth_uri": "https://accounts.google.com/o/oauth2/auth", - "token_uri": "https://accounts.google.com/o/oauth2/token" -}} -tfile = os.path.join(os.path.expanduser("~"), '.gcs_tokens') -ACLs = {"authenticatedread", "bucketownerfullcontrol", "bucketownerread", - "private", "projectprivate", "publicread"} -bACLs = {"authenticatedRead", "private", "projectPrivate", "publicRead", - "publicReadWrite"} -DEFAULT_PROJECT = os.environ.get('GCSFS_DEFAULT_PROJECT', '') +not_secret = { + "client_id": "586241054156-9kst7ltfj66svc342pcn43vp6ta3idin" + ".apps.googleusercontent.com", + "client_secret": "xto0LIFYX35mmHF9T1R2QBqT", +} +client_config = { + "installed": { + "client_id": not_secret["client_id"], + "client_secret": not_secret["client_secret"], + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "https://accounts.google.com/o/oauth2/token", + } +} +tfile = os.path.join(os.path.expanduser("~"), ".gcs_tokens") +ACLs = { + "authenticatedread", + "bucketownerfullcontrol", + "bucketownerread", + "private", + "projectprivate", + "publicread", +} +bACLs = { + "authenticatedRead", + "private", + "projectPrivate", + "publicRead", + "publicReadWrite", +} +DEFAULT_PROJECT = os.environ.get("GCSFS_DEFAULT_PROJECT", "") GCS_MIN_BLOCK_SIZE = 2 ** 18 DEFAULT_BLOCK_SIZE = 5 * 2 ** 20 @@ -90,8 +104,8 @@ def quote_plus(s): ------- corrected URL """ - s = s.replace('/', '%2F') - s = s.replace(' ', '%20') + s = s.replace("/", "%2F") + s = s.replace(" ", "%20") return s @@ -123,16 +137,16 @@ def split_path(path): >>> split_path("gs://mybucket") ['mybucket', ''] """ - if path.startswith('gcs://'): + if path.startswith("gcs://"): path = path[6:] - if path.startswith('gs://'): + if path.startswith("gs://"): path = path[5:] - if path.startswith('/'): + if path.startswith("/"): path = path[1:] - if '/' not in path: + if "/" not in path: return path, "" else: - return path.split('/', 1) + return path.split("/", 1) def validate_response(r, path): @@ -148,9 +162,10 @@ def validate_response(r, path): m = str(r.content) error = None try: - error = r.json()['error'] - msg = error['message'] - except: + error = r.json()["error"] + msg = error["message"] + except: # noqa: E722 + # TODO: limit to appropriate exceptions msg = str(r.content) if r.status_code == 404: @@ -170,7 +185,7 @@ def validate_response(r, path): class GCSFileSystem(fsspec.AbstractFileSystem): - """ + r""" Connect to Google Cloud Storage. The following modes of authentication are supported: @@ -264,22 +279,32 @@ class GCSFileSystem(fsspec.AbstractFileSystem): this parameter to True will ensure that an actual operation is attempted before deciding that credentials are valid. """ - scopes = {'read_only', 'read_write', 'full_control'} + + scopes = {"read_only", "read_write", "full_control"} retries = 6 # number of retries on http failure base = "https://www.googleapis.com/storage/v1/" default_block_size = DEFAULT_BLOCK_SIZE - protocol = 'gcs', 'gs' - - def __init__(self, project=DEFAULT_PROJECT, access='full_control', - token=None, block_size=None, consistency='none', - cache_timeout=None, secure_serialize=True, - check_connection=False, requests_timeout=None, **kwargs): + protocol = "gcs", "gs" + + def __init__( + self, + project=DEFAULT_PROJECT, + access="full_control", + token=None, + block_size=None, + consistency="none", + cache_timeout=None, + secure_serialize=True, + check_connection=False, + requests_timeout=None, + **kwargs + ): if self._cached: return if access not in self.scopes: - raise ValueError('access must be one of {}', self.scopes) + raise ValueError("access must be one of {}", self.scopes) if project is None: - warnings.warn('GCS project not set - cannot list or create buckets') + warnings.warn("GCS project not set - cannot list or create buckets") if block_size is not None: self.default_block_size = block_size self.project = project @@ -301,12 +326,13 @@ def __init__(self, project=DEFAULT_PROJECT, access='full_control', def load_tokens(): """Get "browser" tokens from disc""" try: - with open(tfile, 'rb') as f: + with open(tfile, "rb") as f: tokens = pickle.load(f) # backwards compatability - tokens = {k: (GCSFileSystem._dict_to_credentials(v) - if isinstance(v, dict) else v) - for k, v in tokens.items()} + tokens = { + k: (GCSFileSystem._dict_to_credentials(v) if isinstance(v, dict) else v) + for k, v in tokens.items() + } except Exception: tokens = {} GCSFileSystem.tokens = tokens @@ -334,14 +360,17 @@ def _dict_to_credentials(self, token): """ try: token = service_account.Credentials.from_service_account_info( - token, scopes=[self.scope]) - except: + token, scopes=[self.scope] + ) + except: # noqa: E722 + # TODO: catch specific exceptions token = Credentials( - None, refresh_token=token['refresh_token'], - client_secret=token['client_secret'], - client_id=token['client_id'], - token_uri='https://www.googleapis.com/oauth2/v4/token', - scopes=[self.scope] + None, + refresh_token=token["refresh_token"], + client_secret=token["client_secret"], + client_id=token["client_id"], + token_uri="https://www.googleapis.com/oauth2/v4/token", + scopes=[self.scope], ) return token @@ -362,7 +391,8 @@ def _connect_token(self, token): # is this a "service" token? self._connect_service(token) return - except: + except: # noqa: E722 + # TODO: catch specific exceptions # some other kind of token file # will raise exception if is not json token = json.load(open(token)) @@ -371,13 +401,14 @@ def _connect_token(self, token): elif isinstance(token, google.auth.credentials.Credentials): credentials = token else: - raise ValueError('Token format not understood') + raise ValueError("Token format not understood") self.session = AuthorizedSession(credentials) def _connect_service(self, fn): # raises exception if file does not match expectation credentials = service_account.Credentials.from_service_account_file( - fn, scopes=[self.scope]) + fn, scopes=[self.scope] + ) self.session = AuthorizedSession(credentials) def _connect_anon(self): @@ -401,53 +432,65 @@ def connect(self, method=None): Type of authorisation to implement - calls `_connect_*` methods. If None, will try sequence of methods. """ - if method not in ['google_default', 'cache', 'cloud', 'token', 'anon', - 'browser', None]: + if method not in [ + "google_default", + "cache", + "cloud", + "token", + "anon", + "browser", + None, + ]: self._connect_token(method) elif method is None: - for meth in ['google_default', 'cache', 'anon']: + for meth in ["google_default", "cache", "anon"]: try: self.connect(method=meth) - if self.check_credentials and meth != 'anon': - self.ls('anaconda-public-data') - except: + if self.check_credentials and meth != "anon": + self.ls("anaconda-public-data") + except: # noqa: E722 + # TODO: catch specific exceptions self.session = None logger.debug('Connection with method "%s" failed' % meth) if self.session: break else: - self.__getattribute__('_connect_' + method)() + self.__getattribute__("_connect_" + method)() self.method = method if self.session is None: if method is None: - msg = ("Automatic authentication failed, you should try " - "specifying a method with the token= kwarg") + msg = ( + "Automatic authentication failed, you should try " + "specifying a method with the token= kwarg" + ) else: - msg = ("Auth failed with method '%s'. See the docstrings for " - "further details about your auth mechanism, also " - "available at https://gcsfs.readthedocs.io/en/latest/" - "api.html#gcsfs.core.GCSFileSystem" % method) + msg = ( + "Auth failed with method '%s'. See the docstrings for " + "further details about your auth mechanism, also " + "available at https://gcsfs.readthedocs.io/en/latest/" + "api.html#gcsfs.core.GCSFileSystem" % method + ) raise RuntimeError(msg) @staticmethod def _save_tokens(): try: - with open(tfile, 'wb') as f: + with open(tfile, "wb") as f: pickle.dump(GCSFileSystem.tokens, f, 2) except Exception as e: - warnings.warn('Saving token cache failed: ' + str(e)) + warnings.warn("Saving token cache failed: " + str(e)) @_tracemethod def _call(self, method, path, *args, **kwargs): for k, v in list(kwargs.items()): if v is None: del kwargs[k] - json = kwargs.pop('json', None) - headers = kwargs.pop('headers', None) - data = kwargs.pop('data', None) + json = kwargs.pop("json", None) + headers = kwargs.pop("headers", None) + data = kwargs.pop("data", None) r = None - if not path.startswith('http'): + if not path.startswith("http"): path = self.base + path if args: @@ -456,12 +499,24 @@ def _call(self, method, path, *args, **kwargs): for retry in range(self.retries): try: if retry > 0: - time.sleep(min(random.random() + 2**(retry-1), 32)) - r = self.session.request(method, path, - params=kwargs, json=json, headers=headers, data=data, timeout=self.requests_timeout) + time.sleep(min(random.random() + 2 ** (retry - 1), 32)) + r = self.session.request( + method, + path, + params=kwargs, + json=json, + headers=headers, + data=data, + timeout=self.requests_timeout, + ) validate_response(r, path) break - except (HttpError, RequestException, RateLimitException, GoogleAuthError) as e: + except ( + HttpError, + RequestException, + RateLimitException, + GoogleAuthError, + ) as e: if retry == self.retries - 1: logger.exception("_call out of retries on exception: %s", e) raise e @@ -491,7 +546,7 @@ def _process_object(bucket, object_metadata): result = dict(object_metadata) result["size"] = int(object_metadata.get("size", 0)) result["name"] = posixpath.join(bucket, object_metadata["name"]) - result['type'] = 'file' + result["type"] = "file" return result @@ -517,7 +572,9 @@ def _get_object(self, path): # listing. raise FileNotFoundError(path) - result = self._process_object(bucket, self._call('GET', 'b/{}/o/{}', bucket, key).json()) + result = self._process_object( + bucket, self._call("GET", "b/{}/o/{}", bucket, key).json() + ) return result @@ -531,7 +588,10 @@ def _maybe_get_cached_listing(self, path): logger.debug( "expired cache path: %s retrieved_time: %.3f cache_age: " "%.3f cache_timeout: %.3f", - path, retrieved_time, cache_age, self.cache_timeout + path, + retrieved_time, + cache_age, + self.cache_timeout, ) del self._listing_cache[path] return None @@ -563,33 +623,47 @@ def _do_list_objects(self, path, max_results=None): prefixes = [] items = [] - page = self._call('GET', 'b/{}/o/', bucket, - delimiter="/", prefix=prefix, maxResults=max_results - ).json() + page = self._call( + "GET", + "b/{}/o/", + bucket, + delimiter="/", + prefix=prefix, + maxResults=max_results, + ).json() assert page["kind"] == "storage#objects" prefixes.extend(page.get("prefixes", [])) - items.extend([i for i in page.get("items", []) - if prefix is None - or i['name'].rstrip('/') == prefix.rstrip('/') - or i['name'].startswith(prefix.rstrip('/') + '/')]) - next_page_token = page.get('nextPageToken', None) + items.extend( + [ + i + for i in page.get("items", []) + if prefix is None + or i["name"].rstrip("/") == prefix.rstrip("/") + or i["name"].startswith(prefix.rstrip("/") + "/") + ] + ) + next_page_token = page.get("nextPageToken", None) while next_page_token is not None: - page = self._call('GET', 'b/{}/o/', bucket, - delimiter="/", prefix=prefix, - maxResults=max_results, pageToken=next_page_token - ).json() + page = self._call( + "GET", + "b/{}/o/", + bucket, + delimiter="/", + prefix=prefix, + maxResults=max_results, + pageToken=next_page_token, + ).json() assert page["kind"] == "storage#objects" prefixes.extend(page.get("prefixes", [])) - items.extend([ - i for i in page.get("items", []) - ]) - next_page_token = page.get('nextPageToken', None) + items.extend([i for i in page.get("items", [])]) + next_page_token = page.get("nextPageToken", None) - prefixes = [p for p in prefixes - if prefix is None or prefix.rstrip('/') + '/' in p] + prefixes = [ + p for p in prefixes if prefix is None or prefix.rstrip("/") + "/" in p + ] result = { "kind": "storage#objects", "prefixes": prefixes, @@ -601,23 +675,24 @@ def _do_list_objects(self, path, max_results=None): def _list_buckets(self): """Return list of all buckets under the current project.""" items = [] - page = self._call('GET', 'b/', - project=self.project).json() + page = self._call("GET", "b/", project=self.project).json() assert page["kind"] == "storage#buckets" items.extend(page.get("items", [])) - next_page_token = page.get('nextPageToken', None) + next_page_token = page.get("nextPageToken", None) while next_page_token is not None: page = self._call( - 'GET', 'b/', project=self.project, pageToken=next_page_token).json() + "GET", "b/", project=self.project, pageToken=next_page_token + ).json() assert page["kind"] == "storage#buckets" items.extend(page.get("items", [])) - next_page_token = page.get('nextPageToken', None) + next_page_token = page.get("nextPageToken", None) - return [{'name': i['name'] + '/', 'size': 0, 'type': "directory"} - for i in items] + return [ + {"name": i["name"] + "/", "size": 0, "type": "directory"} for i in items + ] @_tracemethod def invalidate_cache(self, path=None): @@ -636,15 +711,13 @@ def invalidate_cache(self, path=None): else: path = norm_path(path) - invalid_keys = [k for k in self._listing_cache - if k.startswith(path)] + invalid_keys = [k for k in self._listing_cache if k.startswith(path)] for k in invalid_keys: self._listing_cache.pop(k, None) @_tracemethod - def mkdir(self, bucket, acl='projectPrivate', - default_acl='bucketOwnerFullControl'): + def mkdir(self, bucket, acl="projectPrivate", default_acl="bucketOwnerFullControl"): """ New bucket @@ -658,13 +731,18 @@ def mkdir(self, bucket, acl='projectPrivate', default_acl: str, one of ACLs default ACL for objects created in this bucket """ - if bucket in ['', '/']: - raise ValueError('Cannot create root bucket') - if '/' in bucket: + if bucket in ["", "/"]: + raise ValueError("Cannot create root bucket") + if "/" in bucket: return - self._call('post', 'b/', predefinedAcl=acl, project=self.project, - predefinedDefaultObjectAcl=default_acl, - json={"name": bucket}) + self._call( + "post", + "b/", + predefinedAcl=acl, + project=self.project, + predefinedDefaultObjectAcl=default_acl, + json={"name": bucket}, + ) self.invalidate_cache(bucket) @_tracemethod @@ -677,9 +755,9 @@ def rmdir(self, bucket): bucket name. If contains '/' (i.e., looks like subdir), will have no effect because GCS doesn't have real directories. """ - if '/' in bucket: + if "/" in bucket: return - self._call('delete', 'b/' + bucket) + self._call("delete", "b/" + bucket) self.invalidate_cache(bucket) def info(self, path, **kwargs): @@ -689,17 +767,23 @@ def info(self, path, **kwargs): parent_path = norm_path(self._parent(path)).rstrip("/") parent_cache = self._maybe_get_cached_listing(parent_path + "/") if parent_cache: - for o in parent_cache['items']: - if o['name'].rstrip("/") == path: + for o in parent_cache["items"]: + if o["name"].rstrip("/") == path: return o # Check exact file path - out = [o for o in self.ls(path, detail=True, **kwargs) - if o['name'].rstrip("/") == path] + out = [ + o + for o in self.ls(path, detail=True, **kwargs) + if o["name"].rstrip("/") == path + ] if out: return out[0] # Check parent path - out = [o for o in self.ls(parent_path, detail=True, **kwargs) - if o['name'].rstrip("/") == path] + out = [ + o + for o in self.ls(parent_path, detail=True, **kwargs) + if o["name"].rstrip("/") == path + ] if out: return out[0] else: @@ -710,7 +794,7 @@ def ls(self, path, detail=False): """List objects under the given '/{bucket}/{prefix} path.""" path = norm_path(path) - if path in ['/', '']: + if path in ["/", ""]: if detail: return self._list_buckets() else: @@ -718,11 +802,9 @@ def ls(self, path, detail=False): elif path.endswith("/"): return self._ls(path, detail) else: - combined_listing = self._ls(path, detail) + self._ls(path + "/", - detail) + combined_listing = self._ls(path, detail) + self._ls(path + "/", detail) if detail: - combined_entries = dict( - (l["name"], l) for l in combined_listing) + combined_entries = dict((l["name"], l) for l in combined_listing) combined_entries.pop(path + "/", None) return list(combined_entries.values()) else: @@ -735,13 +817,14 @@ def _ls(self, path, detail=False): item_details = listing["items"] - pseudodirs = [{ - 'bucket': bucket, - 'name': bucket + "/" + prefix, - 'kind': 'storage#object', - 'size': 0, - 'storageClass': 'DIRECTORY', - 'type': 'directory' + pseudodirs = [ + { + "bucket": bucket, + "name": bucket + "/" + prefix, + "kind": "storage#object", + "size": 0, + "storageClass": "DIRECTORY", + "type": "directory", } for prefix in listing["prefixes"] ] @@ -749,12 +832,12 @@ def _ls(self, path, detail=False): if detail: return out else: - return sorted([o['name'] for o in out]) + return sorted([o["name"] for o in out]) @staticmethod def url(path): """ Get HTTP URL of the given path """ - u = 'https://www.googleapis.com/download/storage/v1/b/{}/o/{}?alt=media' + u = "https://www.googleapis.com/download/storage/v1/b/{}/o/{}?alt=media" bucket, object = split_path(path) object = quote_plus(object) return u.format(bucket, object) @@ -765,23 +848,22 @@ def cat(self, path): u2 = self.url(path) r = self.session.get(u2) r.raise_for_status() - if 'X-Goog-Hash' in r.headers: + if "X-Goog-Hash" in r.headers: # if header includes md5 hash, check that data matches - bits = r.headers['X-Goog-Hash'].split(',') + bits = r.headers["X-Goog-Hash"].split(",") for bit in bits: - key, val = bit.split('=', 1) - if key == 'md5': + key, val = bit.split("=", 1) + if key == "md5": md = b64decode(val) assert md5(r.content).digest() == md, "Checksum failure" return r.content def getxattr(self, path, attr): """Get user-defined metadata attribute""" - meta = self.info(path).get('metadata', {}) + meta = self.info(path).get("metadata", {}) return meta[attr] - def setxattrs(self, path, content_type=None, content_encoding=None, - **kwargs): + def setxattrs(self, path, content_type=None, content_encoding=None, **kwargs): """ Set/delete/add writable metadata attributes Parameters @@ -798,29 +880,36 @@ def setxattrs(self, path, content_type=None, content_encoding=None, ------- Entire metadata after update (even if only path is passed) """ - i_json = {'metadata': kwargs} + i_json = {"metadata": kwargs} if content_type is not None: - i_json['contentType'] = content_type + i_json["contentType"] = content_type if content_encoding is not None: - i_json['contentEncoding'] = content_encoding + i_json["contentEncoding"] = content_encoding bucket, key = split_path(path) - o_json = self._call('PATCH', "b/{}/o/{}", bucket, key, - fields='metadata', json=i_json - ).json() - self.info(path)['metadata'] = o_json.get('metadata', {}) - return o_json.get('metadata', {}) + o_json = self._call( + "PATCH", "b/{}/o/{}", bucket, key, fields="metadata", json=i_json + ).json() + self.info(path)["metadata"] = o_json.get("metadata", {}) + return o_json.get("metadata", {}) @_tracemethod def merge(self, path, paths, acl=None): """Concatenate objects within a single bucket""" bucket, key = split_path(path) - source = [{'name': split_path(p)[1]} for p in paths] - self._call('POST', 'b/{}/o/{}/compose', bucket, key, - destinationPredefinedAcl=acl, - json={'sourceObjects': source, - "kind": "storage#composeRequest", - 'destination': {'name': key, 'bucket': bucket}}) + source = [{"name": split_path(p)[1]} for p in paths] + self._call( + "POST", + "b/{}/o/{}/compose", + bucket, + key, + destinationPredefinedAcl=acl, + json={ + "sourceObjects": source, + "kind": "storage#composeRequest", + "destination": {"name": key, "bucket": bucket}, + }, + ) @_tracemethod def copy(self, path1, path2, acl=None): @@ -828,12 +917,26 @@ def copy(self, path1, path2, acl=None): """ b1, k1 = split_path(path1) b2, k2 = split_path(path2) - out = self._call('POST', 'b/{}/o/{}/rewriteTo/b/{}/o/{}', b1, k1, b2, k2, - destinationPredefinedAcl=acl).json() - while out['done'] is not True: + out = self._call( + "POST", + "b/{}/o/{}/rewriteTo/b/{}/o/{}", + b1, + k1, + b2, + k2, + destinationPredefinedAcl=acl, + ).json() + while out["done"] is not True: out = self._call( - 'POST', 'b/{}/o/{}/rewriteTo/b/{}/o/{}', b1, k1, b2, k2, - rewriteToken=out['rewriteToken'], destinationPredefinedAcl=acl).json() + "POST", + "b/{}/o/{}/rewriteTo/b/{}/o/{}", + b1, + k1, + b2, + k2, + rewriteToken=out["rewriteToken"], + destinationPredefinedAcl=acl, + ).json() @_tracemethod def rm(self, path, recursive=False): @@ -846,38 +949,61 @@ def rm(self, path, recursive=False): If recursive, delete all keys given by find(path) """ if isinstance(path, (tuple, list)): - template = ('\n--===============7330845974216740156==\n' - 'Content-Type: application/http\n' - 'Content-Transfer-Encoding: binary\n' - 'Content-ID: ' - '\n\nDELETE /storage/v1/b/{bucket}/o/{key} HTTP/1.1\n' - 'Content-Type: application/json\n' - 'accept: application/json\ncontent-length: 0\n') - body = "".join([template.format(i=i+1, bucket=p.split('/', 1)[0], - key=quote_plus(p.split('/', 1)[1])) - for i, p in enumerate(path)]) + template = ( + "\n--===============7330845974216740156==\n" + "Content-Type: application/http\n" + "Content-Transfer-Encoding: binary\n" + "Content-ID: " + "\n\nDELETE /storage/v1/b/{bucket}/o/{key} HTTP/1.1\n" + "Content-Type: application/json\n" + "accept: application/json\ncontent-length: 0\n" + ) + body = "".join( + [ + template.format( + i=i + 1, + bucket=p.split("/", 1)[0], + key=quote_plus(p.split("/", 1)[1]), + ) + for i, p in enumerate(path) + ] + ) r = self._call( - 'POST', 'https://www.googleapis.com/batch', - headers={'Content-Type': 'multipart/mixed; boundary="==========' - '=====7330845974216740156=="'}, - data=body + "\n--===============7330845974216740156==--") + "POST", + "https://www.googleapis.com/batch", + headers={ + "Content-Type": 'multipart/mixed; boundary="==========' + '=====7330845974216740156=="' + }, + data=body + "\n--===============7330845974216740156==--", + ) - boundary = r.headers['Content-Type'].split('=', 1)[1] + boundary = r.headers["Content-Type"].split("=", 1)[1] parents = {posixpath.dirname(norm_path(p)) for p in path} [self.invalidate_cache(parent) for parent in parents] - return ['200 OK' in c or '204 No Content' in c for c in - r.text.split(boundary)][1:-1] + return [ + "200 OK" in c or "204 No Content" in c for c in r.text.split(boundary) + ][1:-1] elif recursive: return self.rm(self.find(path)) else: bucket, key = split_path(path) - self._call('DELETE', "b/{}/o/{}", bucket, key) + self._call("DELETE", "b/{}/o/{}", bucket, key) self.invalidate_cache(posixpath.dirname(norm_path(path))) return True @_tracemethod - def _open(self, path, mode='rb', block_size=None, acl=None, - consistency=None, metadata=None, autocommit=True, **kwargs): + def _open( + self, + path, + mode="rb", + block_size=None, + acl=None, + consistency=None, + metadata=None, + autocommit=True, + **kwargs + ): """ See ``GCSFile``. @@ -887,19 +1013,35 @@ def _open(self, path, mode='rb', block_size=None, acl=None, if block_size is None: block_size = self.default_block_size const = consistency or self.consistency - return GCSFile(self, path, mode, block_size, consistency=const, - metadata=metadata, acl=acl, autocommit=autocommit, - **kwargs) + return GCSFile( + self, + path, + mode, + block_size, + consistency=const, + metadata=metadata, + acl=acl, + autocommit=autocommit, + **kwargs + ) GCSFileSystem.load_tokens() class GCSFile(fsspec.spec.AbstractBufferedFile): - - def __init__(self, gcsfs, path, mode='rb', block_size=DEFAULT_BLOCK_SIZE, - acl=None, consistency='md5', metadata=None, - autocommit=True, **kwargs): + def __init__( + self, + gcsfs, + path, + mode="rb", + block_size=DEFAULT_BLOCK_SIZE, + acl=None, + consistency="md5", + metadata=None, + autocommit=True, + **kwargs + ): """ Open a file. @@ -924,11 +1066,10 @@ def __init__(self, gcsfs, path, mode='rb', block_size=DEFAULT_BLOCK_SIZE, metadata: dict Custom metadata, in key/value pairs, added at file creation """ - super().__init__(gcsfs, path, mode, block_size, autocommit=autocommit, - **kwargs) + super().__init__(gcsfs, path, mode, block_size, autocommit=autocommit, **kwargs) bucket, key = split_path(path) if not key: - raise OSError('Attempt to open a bucket') + raise OSError("Attempt to open a bucket") self.gcsfs = gcsfs self.bucket = bucket self.key = key @@ -936,11 +1077,11 @@ def __init__(self, gcsfs, path, mode='rb', block_size=DEFAULT_BLOCK_SIZE, self.acl = acl self.trim = True self.consistency = consistency - if self.consistency == 'md5': + if self.consistency == "md5": self.md5 = md5() - if mode == 'wb': + if mode == "wb": if self.blocksize < GCS_MIN_BLOCK_SIZE: - warnings.warn('Setting block size to minimum value, 2**18') + warnings.warn("Setting block size to minimum value, 2**18") self.blocksize = GCS_MIN_BLOCK_SIZE self.location = None @@ -950,7 +1091,7 @@ def info(self): def url(self): """ HTTP link to this file's data """ - return self.details['mediaLink'] + return self.details["mediaLink"] @_tracemethod def _upload_chunk(self, final=False): @@ -967,11 +1108,14 @@ def _upload_chunk(self, final=False): l = len(data) if final and self.autocommit: if l: - head['Content-Range'] = 'bytes %i-%i/%i' % ( - self.offset, self.offset + l - 1, self.offset + l) + head["Content-Range"] = "bytes %i-%i/%i" % ( + self.offset, + self.offset + l - 1, + self.offset + l, + ) else: # closing when buffer is empty - head['Content-Range'] = 'bytes */%i' % self.offset + head["Content-Range"] = "bytes */%i" % self.offset data = None else: if l < GCS_MIN_BLOCK_SIZE: @@ -979,34 +1123,36 @@ def _upload_chunk(self, final=False): return elif not final: raise ValueError("Non-final chunk write below min size.") - head['Content-Range'] = 'bytes %i-%i/*' % ( - self.offset, self.offset + l - 1) - head.update({'Content-Type': 'application/octet-stream', - 'Content-Length': str(l)}) - r = self.gcsfs._call('POST', self.location, - uploadType='resumable', headers=head, data=data) - if 'Range' in r.headers: - end = int(r.headers['Range'].split('-')[1]) + head["Content-Range"] = "bytes %i-%i/*" % (self.offset, self.offset + l - 1) + head.update( + {"Content-Type": "application/octet-stream", "Content-Length": str(l)} + ) + r = self.gcsfs._call( + "POST", self.location, uploadType="resumable", headers=head, data=data + ) + if "Range" in r.headers: + end = int(r.headers["Range"].split("-")[1]) shortfall = (self.offset + l - 1) - end if shortfall: - if self.consistency == 'md5': + if self.consistency == "md5": self.md5.update(data[:-shortfall]) self.buffer = io.BytesIO(data[-shortfall:]) self.buffer.seek(shortfall) self.offset += l - shortfall return False else: - if self.consistency == 'md5': + if self.consistency == "md5": self.md5.update(data) elif l: # assert final, "Response looks like upload is over" - size, md5 = int(r.json()['size']), r.json()['md5Hash'] - if self.consistency == 'size': + size, md5 = int(r.json()["size"]), r.json()["md5Hash"] + if self.consistency == "size": assert size == self.buffer.tell() + self.offset, "Size mismatch" - if self.consistency == 'md5': - assert b64encode( - self.md5.digest()) == md5.encode(), "MD5 checksum failed" + if self.consistency == "md5": + assert ( + b64encode(self.md5.digest()) == md5.encode() + ), "MD5 checksum failed" else: assert final, "Response looks like upload is over" return True @@ -1019,11 +1165,14 @@ def commit(self): @_tracemethod def _initiate_upload(self): """ Create multi-upload """ - r = self.gcsfs._call('POST', 'https://www.googleapis.com/upload/storage' - '/v1/b/%s/o' % quote_plus(self.bucket), - uploadType='resumable', - json={'name': self.key, 'metadata': self.metadata}) - self.location = r.headers['Location'] + r = self.gcsfs._call( + "POST", + "https://www.googleapis.com/upload/storage" + "/v1/b/%s/o" % quote_plus(self.bucket), + uploadType="resumable", + json={"name": self.key, "metadata": self.metadata}, + ) + self.location = r.headers["Location"] @_tracemethod def discard(self): @@ -1033,12 +1182,13 @@ def discard(self): """ if self.location is None: return - uid = re.findall('upload_id=([^&=?]+)', self.location) + uid = re.findall("upload_id=([^&=?]+)", self.location) r = self.gcsfs._call( - 'DELETE', - 'https://www.googleapis.com/upload/storage/v1/b/%s/o' - '' % quote_plus(self.bucket), - params={'uploadType': 'resumable', 'upload_id': uid}) + "DELETE", + "https://www.googleapis.com/upload/storage/v1/b/%s/o" + "" % quote_plus(self.bucket), + params={"uploadType": "resumable", "upload_id": uid}, + ) r.raise_for_status() @_tracemethod @@ -1046,27 +1196,35 @@ def _simple_upload(self): """One-shot upload, less than 5MB""" self.buffer.seek(0) data = self.buffer.read() - path = ('https://www.googleapis.com/upload/storage/v1/b/%s/o' - % quote_plus(self.bucket)) - metadata = {'name': self.key} + path = "https://www.googleapis.com/upload/storage/v1/b/%s/o" % quote_plus( + self.bucket + ) + metadata = {"name": self.key} if self.metadata is not None: - metadata['metadata'] = self.metadata + metadata["metadata"] = self.metadata metadata = json.dumps(metadata) - data = ('--==0==' - '\nContent-Type: application/json; charset=UTF-8' - '\n\n' + metadata + - '\n--==0==' - '\nContent-Type: application/octet-stream' - '\n\n').encode() + data + b'\n--==0==--' + data = ( + ( + "--==0==" + "\nContent-Type: application/json; charset=UTF-8" + "\n\n" + metadata + "\n--==0==" + "\nContent-Type: application/octet-stream" + "\n\n" + ).encode() + + data + + b"\n--==0==--" + ) r = self.gcsfs._call( - 'POST', path, - uploadType='multipart', - headers={'Content-Type': 'multipart/related; boundary="==0=="'}, - data=data) - size, md5 = int(r.json()['size']), r.json()['md5Hash'] - if self.consistency == 'size': + "POST", + path, + uploadType="multipart", + headers={"Content-Type": 'multipart/related; boundary="==0=="'}, + data=data, + ) + size, md5 = int(r.json()["size"]), r.json()["md5Hash"] + if self.consistency == "size": assert size == self.buffer.tell(), "Size mismatch" - if self.consistency == 'md5': + if self.consistency == "md5": self.md5.update(data) assert b64encode(self.md5.digest()) == md5.encode(), "MD5 checksum failed" @@ -1080,15 +1238,14 @@ def _fetch_range(self, start=None, end=None): if start is not None or end is not None: start = start or 0 end = end or 0 - head = {'Range': 'bytes=%i-%i' % (start, end - 1)} + head = {"Range": "bytes=%i-%i" % (start, end - 1)} else: head = None try: - r = self.gcsfs._call('GET', self.details['mediaLink'], - headers=head) + r = self.gcsfs._call("GET", self.details["mediaLink"], headers=head) data = r.content return data except RuntimeError as e: - if 'not satisfiable' in str(e): - return b'' + if "not satisfiable" in str(e): + return b"" raise diff --git a/gcsfs/dask_link.py b/gcsfs/dask_link.py index 6f726c85c5..f86f68a7a6 100644 --- a/gcsfs/dask_link.py +++ b/gcsfs/dask_link.py @@ -1,4 +1,3 @@ - def register(): """ Backward compatibility diff --git a/gcsfs/tests/conftest.py b/gcsfs/tests/conftest.py new file mode 100644 index 0000000000..373ec9f366 --- /dev/null +++ b/gcsfs/tests/conftest.py @@ -0,0 +1,14 @@ +import pytest + +from gcsfs.core import GCSFileSystem + + +@pytest.yield_fixture +def token_restore(): + cache = GCSFileSystem.tokens + try: + GCSFileSystem.tokens = {} + yield + finally: + GCSFileSystem.tokens = cache + GCSFileSystem._save_tokens() diff --git a/gcsfs/tests/settings.py b/gcsfs/tests/settings.py index 99e4f13d3d..83a7ca7721 100644 --- a/gcsfs/tests/settings.py +++ b/gcsfs/tests/settings.py @@ -2,25 +2,30 @@ import os import gcsfs.core -RECORD_MODE = os.environ.get('GCSFS_RECORD_MODE', 'none') -TEST_PROJECT = os.environ.get('GCSFS_TEST_PROJECT', 'test_project') +RECORD_MODE = os.environ.get("GCSFS_RECORD_MODE", "none") +TEST_PROJECT = os.environ.get("GCSFS_TEST_PROJECT", "test_project") -TEST_BUCKET = os.environ.get('GCSFS_TEST_BUCKET', 'gcsfs-testing') +TEST_BUCKET = os.environ.get("GCSFS_TEST_BUCKET", "gcsfs-testing") -FAKE_TOKEN = {'access_token': 'xxx', 'expires_in': 0, - 'grant_type': 'refresh_token', - 'refresh_token': 'xxx', 'timestamp': 1487859400.} +FAKE_TOKEN = { + "access_token": "xxx", + "expires_in": 0, + "grant_type": "refresh_token", + "refresh_token": "xxx", + "timestamp": 1487859400.0, +} FAKE_TOKEN.update(gcsfs.core.not_secret) FAKE_GOOGLE_TOKEN = { - "client_id": "764086051850-6qr4p6gpi6hn506pt8ejuq83di341hur." - "apps.googleusercontent.com", - "client_secret": "d-FL95Q19q7MQmFpd7hHD0Ty", - "refresh_token": "xxx", - "type": "authorized_user" + "client_id": ( + "764086051850-6qr4p6gpi6hn506pt8ejuq83di341hur." "apps.googleusercontent.com" + ), + "client_secret": "d-FL95Q19q7MQmFpd7hHD0Ty", + "refresh_token": "xxx", + "type": "authorized_user", } -GOOGLE_TOKEN = os.environ.get('GCSFS_GOOGLE_TOKEN', FAKE_GOOGLE_TOKEN) +GOOGLE_TOKEN = os.environ.get("GCSFS_GOOGLE_TOKEN", FAKE_GOOGLE_TOKEN) if isinstance(GOOGLE_TOKEN, str) and os.path.exists(GOOGLE_TOKEN): with open(GOOGLE_TOKEN) as f: @@ -28,4 +33,4 @@ # /Users/mdurant/.config/gcloud/application_default_credentials.json -DEBUG = os.environ.get('GCSFS_DEBUG', False) +DEBUG = os.environ.get("GCSFS_DEBUG", False) diff --git a/gcsfs/tests/test_core.py b/gcsfs/tests/test_core.py index 8abf3b92d8..324bf6ec43 100644 --- a/gcsfs/tests/test_core.py +++ b/gcsfs/tests/test_core.py @@ -2,27 +2,34 @@ import io from itertools import chain -import os import pytest from gcsfs.tests.settings import TEST_PROJECT, GOOGLE_TOKEN, TEST_BUCKET -from gcsfs.tests.utils import (tempdir, token_restore, my_vcr, gcs_maker, - files, csv_files, text_files, a, b, c, d, - tmpfile) -from gcsfs.core import GCSFileSystem, quote_plus, GCS_MIN_BLOCK_SIZE +from gcsfs.tests.utils import ( + tempdir, + my_vcr, + gcs_maker, + files, + csv_files, + text_files, + a, + b, + tmpfile, +) +from gcsfs.core import GCSFileSystem, quote_plus from gcsfs.utils import seek_delimiter -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_simple(token_restore): assert not GCSFileSystem.tokens gcs = GCSFileSystem(TEST_PROJECT, token=GOOGLE_TOKEN) gcs.ls(TEST_BUCKET) # no error - gcs.ls('/' + TEST_BUCKET) # OK to lead with '/' + gcs.ls("/" + TEST_BUCKET) # OK to lead with '/' @pytest.mark.xfail(reason="should pass for real google, but not VCR") -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_many_connect(token_restore): from multiprocessing.pool import ThreadPool @@ -37,67 +44,69 @@ def task(i): pool.join() -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_simple_upload(token_restore): with gcs_maker() as gcs: - fn = TEST_BUCKET + '/test' - with gcs.open(fn, 'wb') as f: - f.write(b'zz') - assert gcs.cat(fn) == b'zz' + fn = TEST_BUCKET + "/test" + with gcs.open(fn, "wb") as f: + f.write(b"zz") + assert gcs.cat(fn) == b"zz" -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_multi_upload(token_restore): with gcs_maker() as gcs: - fn = TEST_BUCKET + '/test' - d = b'01234567' * 2**15 + fn = TEST_BUCKET + "/test" + d = b"01234567" * 2 ** 15 # something to write on close - with gcs.open(fn, 'wb', block_size=2**18) as f: + with gcs.open(fn, "wb", block_size=2 ** 18) as f: f.write(d) - f.write(b'xx') - assert gcs.cat(fn) == d + b'xx' + f.write(b"xx") + assert gcs.cat(fn) == d + b"xx" # empty buffer on close - with gcs.open(fn, 'wb', block_size=2**19) as f: + with gcs.open(fn, "wb", block_size=2 ** 19) as f: f.write(d) - f.write(b'xx') + f.write(b"xx") f.write(d) - assert gcs.cat(fn) == d + b'xx' + d + assert gcs.cat(fn) == d + b"xx" + d -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_info(token_restore): with gcs_maker() as gcs: gcs.touch(a) assert gcs.info(a) == gcs.ls(a, detail=True)[0] -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_ls2(token_restore): with gcs_maker() as gcs: - assert TEST_BUCKET +'/' in gcs.ls('') + assert TEST_BUCKET + "/" in gcs.ls("") with pytest.raises((OSError, IOError)): - gcs.ls('nonexistent') - fn = TEST_BUCKET+'/test/accounts.1.json' + gcs.ls("nonexistent") + fn = TEST_BUCKET + "/test/accounts.1.json" gcs.touch(fn) - assert fn in gcs.ls(TEST_BUCKET+'/test') + assert fn in gcs.ls(TEST_BUCKET + "/test") -@my_vcr.use_cassette(match=['all']) + +@my_vcr.use_cassette(match=["all"]) def test_pickle(token_restore): import pickle + with gcs_maker() as gcs: # Write data to distinct filename - fn = TEST_BUCKET+'/nested/abcdefg' - with gcs.open(fn, 'wb') as f: - f.write(b'1234567') + fn = TEST_BUCKET + "/nested/abcdefg" + with gcs.open(fn, "wb") as f: + f.write(b"1234567") # verify that that filename is not in the serialized form b = pickle.dumps(gcs) - assert b'abcdefg' not in b - assert b'1234567' not in b - assert b'listing_cache' not in b + assert b"abcdefg" not in b + assert b"1234567" not in b + assert b"listing_cache" not in b gcs2 = pickle.loads(b) @@ -107,22 +116,22 @@ def test_pickle(token_restore): assert gcs.ls(TEST_BUCKET) == gcs2.ls(TEST_BUCKET) -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_ls_touch(token_restore): with gcs_maker() as gcs: - assert not gcs.exists(TEST_BUCKET+'/tmp/test') + assert not gcs.exists(TEST_BUCKET + "/tmp/test") gcs.touch(a) gcs.touch(b) - L = gcs.ls(TEST_BUCKET+'/tmp/test', False) + L = gcs.ls(TEST_BUCKET + "/tmp/test", False) assert set(L) == set([a, b]) - L_d = gcs.ls(TEST_BUCKET+'/tmp/test', True) - assert set(d['name'] for d in L_d) == set([a, b]) + L_d = gcs.ls(TEST_BUCKET + "/tmp/test", True) + assert set(d["name"] for d in L_d) == set([a, b]) -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_rm(token_restore): with gcs_maker() as gcs: assert not gcs.exists(a) @@ -131,12 +140,12 @@ def test_rm(token_restore): gcs.rm(a) assert not gcs.exists(a) with pytest.raises((OSError, IOError)): - gcs.rm(TEST_BUCKET+'/nonexistent') + gcs.rm(TEST_BUCKET + "/nonexistent") with pytest.raises((OSError, IOError)): - gcs.rm('nonexistent') + gcs.rm("nonexistent") -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_rm_batch(token_restore): with gcs_maker() as gcs: gcs.touch(a) @@ -148,9 +157,9 @@ def test_rm_batch(token_restore): assert b not in gcs.find(TEST_BUCKET) -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_rm_recursive(token_restore): - files = ['/a', '/a/b', '/a/c'] + files = ["/a", "/a/b", "/a/c"] with gcs_maker() as gcs: for fn in files: gcs.touch(TEST_BUCKET + fn) @@ -158,12 +167,12 @@ def test_rm_recursive(token_restore): assert gcs.ls(TEST_BUCKET) == [] -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_file_access(token_restore): with gcs_maker() as gcs: - fn = TEST_BUCKET+'/nested/file1' - data = b'hello\n' - with gcs.open(fn, 'wb') as f: + fn = TEST_BUCKET + "/nested/file1" + data = b"hello\n" + with gcs.open(fn, "wb") as f: f.write(data) assert gcs.cat(fn) == data assert gcs.head(fn, 3) == data[:3] @@ -171,93 +180,99 @@ def test_file_access(token_restore): assert gcs.tail(fn, 10000) == data -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_file_info(token_restore): with gcs_maker() as gcs: - fn = TEST_BUCKET+'/nested/file1' - data = b'hello\n' - with gcs.open(fn, 'wb') as f: + fn = TEST_BUCKET + "/nested/file1" + data = b"hello\n" + with gcs.open(fn, "wb") as f: f.write(data) assert fn in gcs.find(TEST_BUCKET) assert gcs.exists(fn) - assert not gcs.exists(fn+'another') - assert gcs.info(fn)['size'] == len(data) + assert not gcs.exists(fn + "another") + assert gcs.info(fn)["size"] == len(data) with pytest.raises((OSError, IOError)): - gcs.info(fn+'another') + gcs.info(fn + "another") -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_du(token_restore): with gcs_maker(True) as gcs: d = gcs.du(TEST_BUCKET, total=False) assert all(isinstance(v, int) and v >= 0 for v in d.values()) - assert TEST_BUCKET+'/nested/file1' in d + assert TEST_BUCKET + "/nested/file1" in d - assert gcs.du(TEST_BUCKET + '/test/', total=True) == sum( - map(len, files.values())) + assert gcs.du(TEST_BUCKET + "/test/", total=True) == sum( + map(len, files.values()) + ) -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_ls(token_restore): with gcs_maker(True) as gcs: - fn = TEST_BUCKET+'/nested/file1' + fn = TEST_BUCKET + "/nested/file1" gcs.touch(fn) - assert fn not in gcs.ls(TEST_BUCKET+'/') - assert fn in gcs.ls(TEST_BUCKET+'/nested/') - assert fn in gcs.ls(TEST_BUCKET+'/nested') + assert fn not in gcs.ls(TEST_BUCKET + "/") + assert fn in gcs.ls(TEST_BUCKET + "/nested/") + assert fn in gcs.ls(TEST_BUCKET + "/nested") -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_ls_detail(token_restore): with gcs_maker(True) as gcs: - L = gcs.ls(TEST_BUCKET+'/nested', detail=True) + L = gcs.ls(TEST_BUCKET + "/nested", detail=True) assert all(isinstance(item, dict) for item in L) -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_gcs_glob(token_restore): with gcs_maker(True) as gcs: - fn = TEST_BUCKET+'/nested/file1' - assert fn not in gcs.glob(TEST_BUCKET+'/') - assert fn not in gcs.glob(TEST_BUCKET+'/*') - assert fn in gcs.glob(TEST_BUCKET+'/nested/') - assert fn in gcs.glob(TEST_BUCKET+'/nested/*') - assert fn in gcs.glob(TEST_BUCKET+'/nested/file*') - assert fn in gcs.glob(TEST_BUCKET+'/*/*') - assert fn in gcs.glob(TEST_BUCKET+'/**') - assert all(f in gcs.find(TEST_BUCKET) for f in - gcs.glob(TEST_BUCKET+'/nested/*') if gcs.isfile(f)) - - -@my_vcr.use_cassette(match=['all']) + fn = TEST_BUCKET + "/nested/file1" + assert fn not in gcs.glob(TEST_BUCKET + "/") + assert fn not in gcs.glob(TEST_BUCKET + "/*") + assert fn in gcs.glob(TEST_BUCKET + "/nested/") + assert fn in gcs.glob(TEST_BUCKET + "/nested/*") + assert fn in gcs.glob(TEST_BUCKET + "/nested/file*") + assert fn in gcs.glob(TEST_BUCKET + "/*/*") + assert fn in gcs.glob(TEST_BUCKET + "/**") + assert all( + f in gcs.find(TEST_BUCKET) + for f in gcs.glob(TEST_BUCKET + "/nested/*") + if gcs.isfile(f) + ) + + +@my_vcr.use_cassette(match=["all"]) def test_read_keys_from_bucket(token_restore): with gcs_maker(True) as gcs: for k, data in files.items(): - file_contents = gcs.cat('/'.join([TEST_BUCKET, k])) + file_contents = gcs.cat("/".join([TEST_BUCKET, k])) assert file_contents == data - assert all(gcs.cat('/'.join([TEST_BUCKET, k])) == - gcs.cat('gcs://' + '/'.join([TEST_BUCKET, k])) - for k in files) + assert all( + gcs.cat("/".join([TEST_BUCKET, k])) + == gcs.cat("gcs://" + "/".join([TEST_BUCKET, k])) + for k in files + ) -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_url(token_restore): with gcs_maker(True) as gcs: - fn = TEST_BUCKET+'/nested/file1' + fn = TEST_BUCKET + "/nested/file1" url = gcs.url(fn) - assert 'http' in url - assert quote_plus('nested/file1') in url + assert "http" in url + assert quote_plus("nested/file1") in url with gcs.open(fn) as f: - assert 'http' in f.url() + assert "http" in f.url() -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_seek(token_restore): with gcs_maker(True) as gcs: - with gcs.open(a, 'wb') as f: - f.write(b'123') + with gcs.open(a, "wb") as f: + f.write(b"123") with gcs.open(a) as f: f.seek(1000) @@ -268,183 +283,190 @@ def test_seek(token_restore): with pytest.raises(ValueError): f.seek(0, 10) f.seek(0) - assert f.read(1) == b'1' + assert f.read(1) == b"1" f.seek(0) - assert f.read(1) == b'1' + assert f.read(1) == b"1" f.seek(3) - assert f.read(1) == b'' + assert f.read(1) == b"" f.seek(-1, 2) - assert f.read(1) == b'3' + assert f.read(1) == b"3" f.seek(-1, 1) f.seek(-1, 1) - assert f.read(1) == b'2' + assert f.read(1) == b"2" for i in range(4): assert f.seek(i) == i -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_bad_open(token_restore): with gcs_maker() as gcs: with pytest.raises((IOError, OSError)): - gcs.open('') + gcs.open("") -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_copy(token_restore): with gcs_maker(True) as gcs: - fn = TEST_BUCKET+'/test/accounts.1.json' - gcs.copy(fn, fn+'2') - assert gcs.cat(fn) == gcs.cat(fn+'2') + fn = TEST_BUCKET + "/test/accounts.1.json" + gcs.copy(fn, fn + "2") + assert gcs.cat(fn) == gcs.cat(fn + "2") -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_move(token_restore): with gcs_maker(True) as gcs: - fn = TEST_BUCKET+'/test/accounts.1.json' + fn = TEST_BUCKET + "/test/accounts.1.json" data = gcs.cat(fn) - gcs.mv(fn, fn+'2') - assert gcs.cat(fn+'2') == data + gcs.mv(fn, fn + "2") + assert gcs.cat(fn + "2") == data assert not gcs.exists(fn) -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_get_put(token_restore): with gcs_maker(True) as gcs: with tmpfile() as fn: - gcs.get(TEST_BUCKET+'/test/accounts.1.json', fn) - data = files['test/accounts.1.json'] - assert open(fn, 'rb').read() == data - gcs.put(fn, TEST_BUCKET+'/temp') - assert gcs.du(TEST_BUCKET+'/temp') == len(data) - assert gcs.cat(TEST_BUCKET+'/temp') == data + gcs.get(TEST_BUCKET + "/test/accounts.1.json", fn) + data = files["test/accounts.1.json"] + assert open(fn, "rb").read() == data + gcs.put(fn, TEST_BUCKET + "/temp") + assert gcs.du(TEST_BUCKET + "/temp") == len(data) + assert gcs.cat(TEST_BUCKET + "/temp") == data -@pytest.mark.parametrize("protocol", ['', 'gs://', 'gcs://']) -@my_vcr.use_cassette(match=['all']) +@pytest.mark.parametrize("protocol", ["", "gs://", "gcs://"]) +@my_vcr.use_cassette(match=["all"]) def test_get_put_recursive(token_restore, protocol): with gcs_maker(True) as gcs: with tempdir() as dn: - gcs.get(protocol+TEST_BUCKET+'/test/', dn+'/temp_dir', recursive=True) + gcs.get(protocol + TEST_BUCKET + "/test/", dn + "/temp_dir", recursive=True) # there is now in local directory: # dn+'/temp_dir/accounts.1.json' # dn+'/temp_dir/accounts.2.json' - data1 = files['test/accounts.1.json'] - data2 = files['test/accounts.2.json'] - assert open(dn+'/temp_dir/accounts.1.json', 'rb').read() == data1 - assert open(dn+'/temp_dir/accounts.2.json', 'rb').read() == data2 - gcs.put(dn+'/temp_dir', protocol+TEST_BUCKET+'/temp_dir', recursive=True) + data1 = files["test/accounts.1.json"] + data2 = files["test/accounts.2.json"] + assert open(dn + "/temp_dir/accounts.1.json", "rb").read() == data1 + assert open(dn + "/temp_dir/accounts.2.json", "rb").read() == data2 + gcs.put( + dn + "/temp_dir", protocol + TEST_BUCKET + "/temp_dir", recursive=True + ) # there is now in remote directory: # protocol+TEST_BUCKET+'/temp_dir/accounts.1.json' # protocol+TEST_BUCKET+'/temp_dir/accounts.2.json' - assert gcs.du(protocol+TEST_BUCKET+'/temp_dir/accounts.1.json' - ) == len(data1) - assert gcs.cat(protocol+TEST_BUCKET+'/temp_dir/accounts.1.json') == data1 - assert gcs.du(protocol+TEST_BUCKET+'/temp_dir/accounts.2.json' - ) == len(data2) - assert gcs.cat(protocol+TEST_BUCKET+'/temp_dir/accounts.2.json') == data2 - - -@my_vcr.use_cassette(match=['all']) + assert gcs.du(protocol + TEST_BUCKET + "/temp_dir/accounts.1.json") == len( + data1 + ) + assert ( + gcs.cat(protocol + TEST_BUCKET + "/temp_dir/accounts.1.json") == data1 + ) + assert gcs.du(protocol + TEST_BUCKET + "/temp_dir/accounts.2.json") == len( + data2 + ) + assert ( + gcs.cat(protocol + TEST_BUCKET + "/temp_dir/accounts.2.json") == data2 + ) + + +@my_vcr.use_cassette(match=["all"]) def test_errors(token_restore): with gcs_maker() as gcs: with pytest.raises((IOError, OSError)): - gcs.open(TEST_BUCKET+'/tmp/test/shfoshf', 'rb') + gcs.open(TEST_BUCKET + "/tmp/test/shfoshf", "rb") ## This is fine, no need for interleving directories on gcs - #with pytest.raises((IOError, OSError)): + # with pytest.raises((IOError, OSError)): # gcs.touch('tmp/test/shfoshf/x') with pytest.raises((IOError, OSError)): - gcs.rm(TEST_BUCKET+'/tmp/test/shfoshf/x') + gcs.rm(TEST_BUCKET + "/tmp/test/shfoshf/x") with pytest.raises((IOError, OSError)): - gcs.mv(TEST_BUCKET+'/tmp/test/shfoshf/x', 'tmp/test/shfoshf/y') + gcs.mv(TEST_BUCKET + "/tmp/test/shfoshf/x", "tmp/test/shfoshf/y") with pytest.raises((IOError, OSError)): - gcs.open('x', 'rb') + gcs.open("x", "rb") with pytest.raises((IOError, OSError)): - gcs.rm('unknown') + gcs.rm("unknown") with pytest.raises(ValueError): - with gcs.open(TEST_BUCKET+'/temp', 'wb') as f: + with gcs.open(TEST_BUCKET + "/temp", "wb") as f: f.read() with pytest.raises(ValueError): - f = gcs.open(TEST_BUCKET+'/temp', 'rb') + f = gcs.open(TEST_BUCKET + "/temp", "rb") f.close() f.read() with pytest.raises(ValueError) as e: - gcs.mkdir('/') - assert 'bucket' in str(e) + gcs.mkdir("/") + assert "bucket" in str(e) -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_read_small(token_restore): with gcs_maker(True) as gcs: - fn = TEST_BUCKET+'/2014-01-01.csv' - with gcs.open(fn, 'rb', block_size=10) as f: + fn = TEST_BUCKET + "/2014-01-01.csv" + with gcs.open(fn, "rb", block_size=10) as f: out = [] while True: data = f.read(3) - if data == b'': + if data == b"": break out.append(data) - assert gcs.cat(fn) == b''.join(out) + assert gcs.cat(fn) == b"".join(out) # cache drop assert len(f.cache.cache) < len(out) -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_seek_delimiter(token_restore): with gcs_maker(True) as gcs: - fn = 'test/accounts.1.json' + fn = "test/accounts.1.json" data = files[fn] - with gcs.open('/'.join([TEST_BUCKET, fn])) as f: - seek_delimiter(f, b'}', 0) + with gcs.open("/".join([TEST_BUCKET, fn])) as f: + seek_delimiter(f, b"}", 0) assert f.tell() == 0 f.seek(1) - seek_delimiter(f, b'}', 5) - assert f.tell() == data.index(b'}') + 1 - seek_delimiter(f, b'\n', 5) - assert f.tell() == data.index(b'\n') + 1 + seek_delimiter(f, b"}", 5) + assert f.tell() == data.index(b"}") + 1 + seek_delimiter(f, b"\n", 5) + assert f.tell() == data.index(b"\n") + 1 f.seek(1, 1) - ind = data.index(b'\n') + data[data.index(b'\n')+1:].index(b'\n') + 1 - seek_delimiter(f, b'\n', 5) + ind = data.index(b"\n") + data[data.index(b"\n") + 1 :].index(b"\n") + 1 + seek_delimiter(f, b"\n", 5) assert f.tell() == ind + 1 -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_read_block(token_restore): with gcs_maker(True) as gcs: - data = files['test/accounts.1.json'] + data = files["test/accounts.1.json"] lines = io.BytesIO(data).readlines() - path = TEST_BUCKET+'/test/accounts.1.json' - assert gcs.read_block(path, 1, 35, b'\n') == lines[1] - assert gcs.read_block(path, 0, 30, b'\n') == lines[0] - assert gcs.read_block(path, 0, 35, b'\n') == lines[0] + lines[1] - out = gcs.read_block(path, 0, 5000, b'\n') - assert gcs.read_block(path, 0, 5000, b'\n') == data + path = TEST_BUCKET + "/test/accounts.1.json" + assert gcs.read_block(path, 1, 35, b"\n") == lines[1] + assert gcs.read_block(path, 0, 30, b"\n") == lines[0] + assert gcs.read_block(path, 0, 35, b"\n") == lines[0] + lines[1] + gcs.read_block(path, 0, 5000, b"\n") + assert gcs.read_block(path, 0, 5000, b"\n") == data assert len(gcs.read_block(path, 0, 5)) == 5 assert len(gcs.read_block(path, 4, 5000)) == len(data) - 4 - assert gcs.read_block(path, 5000, 5010) == b'' + assert gcs.read_block(path, 5000, 5010) == b"" assert gcs.read_block(path, 5, None) == gcs.read_block(path, 5, 1000) -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_flush(token_restore): with gcs_maker() as gcs: gcs.touch(a) - with gcs.open(a, 'rb') as ro: + with gcs.open(a, "rb") as ro: with pytest.raises(ValueError): ro.write(b"abc") ro.flush() - - with gcs.open(b, 'wb') as wo: + with gcs.open(b, "wb") as wo: wo.write(b"abc") wo.flush() assert not gcs.exists(b) @@ -454,153 +476,151 @@ def test_flush(token_restore): wo.write(b"abc") - -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_write_fails(token_restore): with gcs_maker() as gcs: with pytest.raises(ValueError): - gcs.touch(TEST_BUCKET+'/temp') - gcs.open(TEST_BUCKET+'/temp', 'rb').write(b'hello') + gcs.touch(TEST_BUCKET + "/temp") + gcs.open(TEST_BUCKET + "/temp", "rb").write(b"hello") - with gcs.open(TEST_BUCKET+'/temp', 'wb') as f: - f.write(b'hello') + with gcs.open(TEST_BUCKET + "/temp", "wb") as f: + f.write(b"hello") f.flush(force=True) with pytest.raises(ValueError): - f.write(b'world') + f.write(b"world") - f = gcs.open(TEST_BUCKET+'/temp', 'wb') + f = gcs.open(TEST_BUCKET + "/temp", "wb") f.close() with pytest.raises(ValueError): - f.write(b'hello') + f.write(b"hello") with pytest.raises((OSError, IOError)): - gcs.open('nonexistentbucket/temp', 'wb').close() + gcs.open("nonexistentbucket/temp", "wb").close() -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def text_mode(token_restore): - text = 'Hello µ' + text = "Hello µ" with gcs_maker() as gcs: - with gcs.open(TEST_BUCKET+'/temp', 'w') as f: + with gcs.open(TEST_BUCKET + "/temp", "w") as f: f.write(text) - with gcs.open(TEST_BUCKET+'/temp', 'r') as f: + with gcs.open(TEST_BUCKET + "/temp", "r") as f: assert f.read() == text -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_write_blocks(token_restore): with gcs_maker() as gcs: - with gcs.open(TEST_BUCKET+'/temp', 'wb', block_size=2**18) as f: - f.write(b'a' * 100000) + with gcs.open(TEST_BUCKET + "/temp", "wb", block_size=2 ** 18) as f: + f.write(b"a" * 100000) assert f.buffer.tell() == 100000 - assert not(f.offset) - f.write(b'a' * 100000) - f.write(b'a' * 100000) + assert not (f.offset) + f.write(b"a" * 100000) + f.write(b"a" * 100000) assert f.offset - assert gcs.info(TEST_BUCKET+'/temp')['size'] == 300000 + assert gcs.info(TEST_BUCKET + "/temp")["size"] == 300000 -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_write_blocks2(token_restore): with gcs_maker() as gcs: - with gcs.open(TEST_BUCKET+'/temp1', 'wb', block_size=2**18) as f: - f.write(b'a' * (2**18+1)) + with gcs.open(TEST_BUCKET + "/temp1", "wb", block_size=2 ** 18) as f: + f.write(b"a" * (2 ** 18 + 1)) # leftover bytes: GCS accepts blocks in multiples of 2**18 bytes assert f.buffer.tell() == 1 - assert gcs.info(TEST_BUCKET+'/temp1')['size'] == 2**18+1 + assert gcs.info(TEST_BUCKET + "/temp1")["size"] == 2 ** 18 + 1 -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_readline(token_restore): with gcs_maker(True) as gcs: - all_items = chain.from_iterable([ - files.items(), csv_files.items(), text_files.items() - ]) + all_items = chain.from_iterable( + [files.items(), csv_files.items(), text_files.items()] + ) for k, data in all_items: - with gcs.open('/'.join([TEST_BUCKET, k]), 'rb') as f: + with gcs.open("/".join([TEST_BUCKET, k]), "rb") as f: result = f.readline() - expected = data.split(b'\n')[0] + (b'\n' if data.count(b'\n') - else b'') + expected = data.split(b"\n")[0] + (b"\n" if data.count(b"\n") else b"") assert result == expected -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_readline_from_cache(token_restore): with gcs_maker() as gcs: - data = b'a,b\n11,22\n3,4' - with gcs.open(a, 'wb') as f: + data = b"a,b\n11,22\n3,4" + with gcs.open(a, "wb") as f: f.write(data) - with gcs.open(a, 'rb') as f: + with gcs.open(a, "rb") as f: result = f.readline() - assert result == b'a,b\n' + assert result == b"a,b\n" assert f.loc == 4 assert f.cache.cache == data result = f.readline() - assert result == b'11,22\n' + assert result == b"11,22\n" assert f.loc == 10 assert f.cache.cache == data result = f.readline() - assert result == b'3,4' + assert result == b"3,4" assert f.loc == 13 assert f.cache.cache == data -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_readline_empty(token_restore): with gcs_maker() as gcs: - data = b'' - with gcs.open(a, 'wb') as f: + data = b"" + with gcs.open(a, "wb") as f: f.write(data) - with gcs.open(a, 'rb') as f: + with gcs.open(a, "rb") as f: result = f.readline() assert result == data -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_readline_blocksize(token_restore): with gcs_maker() as gcs: - data = b'ab\n' + b'a' * (2**18) + b'\nab' - with gcs.open(a, 'wb') as f: + data = b"ab\n" + b"a" * (2 ** 18) + b"\nab" + with gcs.open(a, "wb") as f: f.write(data) - with gcs.open(a, 'rb', block_size=2**18) as f: + with gcs.open(a, "rb", block_size=2 ** 18) as f: result = f.readline() - expected = b'ab\n' + expected = b"ab\n" assert result == expected result = f.readline() - expected = b'a' * (2**18) + b'\n' + expected = b"a" * (2 ** 18) + b"\n" assert result == expected result = f.readline() - expected = b'ab' + expected = b"ab" assert result == expected -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_next(token_restore): with gcs_maker(True) as gcs: - expected = csv_files['2014-01-01.csv'].split(b'\n')[0] + b'\n' - with gcs.open(TEST_BUCKET + '/2014-01-01.csv') as f: + expected = csv_files["2014-01-01.csv"].split(b"\n")[0] + b"\n" + with gcs.open(TEST_BUCKET + "/2014-01-01.csv") as f: result = next(f) assert result == expected -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_iterable(token_restore): with gcs_maker() as gcs: - data = b'abc\n123' - with gcs.open(a, 'wb') as f: + data = b"abc\n123" + with gcs.open(a, "wb") as f: f.write(data) with gcs.open(a) as f, io.BytesIO(data) as g: for fromgcs, fromio in zip(f, g): assert fromgcs == fromio f.seek(0) - assert f.readline() == b'abc\n' - assert f.readline() == b'123' + assert f.readline() == b"abc\n" + assert f.readline() == b"123" f.seek(1) - assert f.readline() == b'bc\n' + assert f.readline() == b"bc\n" with gcs.open(a) as f: out = list(f) @@ -610,62 +630,62 @@ def test_iterable(token_restore): assert b"".join(out) == data -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_readable(token_restore): with gcs_maker() as gcs: - with gcs.open(a, 'wb') as f: + with gcs.open(a, "wb") as f: assert not f.readable() - with gcs.open(a, 'rb') as f: + with gcs.open(a, "rb") as f: assert f.readable() -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_seekable(token_restore): with gcs_maker() as gcs: - with gcs.open(a, 'wb') as f: + with gcs.open(a, "wb") as f: assert not f.seekable() - with gcs.open(a, 'rb') as f: + with gcs.open(a, "rb") as f: assert f.seekable() -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_writable(token_restore): with gcs_maker() as gcs: - with gcs.open(a, 'wb') as f: + with gcs.open(a, "wb") as f: assert f.writable() - with gcs.open(a, 'rb') as f: + with gcs.open(a, "rb") as f: assert not f.writable() -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_merge(token_restore): with gcs_maker() as gcs: - with gcs.open(a, 'wb') as f: - f.write(b'a' * 100) + with gcs.open(a, "wb") as f: + f.write(b"a" * 100) - with gcs.open(b, 'wb') as f: - f.write(b'a' * 100) - gcs.merge(TEST_BUCKET+'/joined', [a, b]) - assert gcs.info(TEST_BUCKET+'/joined')['size'] == 200 + with gcs.open(b, "wb") as f: + f.write(b"a" * 100) + gcs.merge(TEST_BUCKET + "/joined", [a, b]) + assert gcs.info(TEST_BUCKET + "/joined")["size"] == 200 -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_bigger_than_block_read(token_restore): with gcs_maker(True) as gcs: - with gcs.open(TEST_BUCKET+'/2014-01-01.csv', 'rb', block_size=3) as f: + with gcs.open(TEST_BUCKET + "/2014-01-01.csv", "rb", block_size=3) as f: out = [] while True: data = f.read(20) out.append(data) if len(data) == 0: break - assert b''.join(out) == csv_files['2014-01-01.csv'] + assert b"".join(out) == csv_files["2014-01-01.csv"] -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_current(token_restore): from google.auth import credentials @@ -673,42 +693,42 @@ def test_current(token_restore): assert GCSFileSystem.current() is gcs gcs2 = GCSFileSystem(TEST_PROJECT, token=GOOGLE_TOKEN) assert gcs2.session is gcs.session - gcs2 = GCSFileSystem(TEST_PROJECT, token=GOOGLE_TOKEN, - secure_serialize=False) + gcs2 = GCSFileSystem(TEST_PROJECT, token=GOOGLE_TOKEN, secure_serialize=False) assert isinstance(gcs2.token, credentials.Credentials) -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_array(token_restore): with gcs_maker() as gcs: from array import array - data = array('B', [65] * 1000) - with gcs.open(a, 'wb') as f: + data = array("B", [65] * 1000) + + with gcs.open(a, "wb") as f: f.write(data) - with gcs.open(a, 'rb') as f: + with gcs.open(a, "rb") as f: out = f.read() - assert out == b'A' * 1000 + assert out == b"A" * 1000 -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_attrs(token_restore): with gcs_maker() as gcs: gcs.touch(a) - assert 'metadata' not in gcs.info(a) + assert "metadata" not in gcs.info(a) with pytest.raises(KeyError): - gcs.getxattr(a, 'foo') + gcs.getxattr(a, "foo") - gcs.touch(a, metadata={'foo': 'blob'}) - assert gcs.getxattr(a, 'foo') == 'blob' + gcs.touch(a, metadata={"foo": "blob"}) + assert gcs.getxattr(a, "foo") == "blob" - gcs.setxattrs(a, foo='blah') - assert gcs.getxattr(a, 'foo') == 'blah' + gcs.setxattrs(a, foo="blah") + assert gcs.getxattr(a, "foo") == "blah" - with gcs.open(a, 'wb') as f: - f.metadata = {'something': 'not'} + with gcs.open(a, "wb") as f: + f.metadata = {"something": "not"} with pytest.raises(KeyError): - gcs.getxattr(a, 'foo') - assert gcs.getxattr(a, 'something') == 'not' + gcs.getxattr(a, "foo") + assert gcs.getxattr(a, "something") == "not" diff --git a/gcsfs/tests/test_fuse.py b/gcsfs/tests/test_fuse.py index 3be4acaa10..390565f2be 100644 --- a/gcsfs/tests/test_fuse.py +++ b/gcsfs/tests/test_fuse.py @@ -1,26 +1,25 @@ import os import pytest -fuse = pytest.importorskip('fuse') + +fuse = pytest.importorskip("fuse") import tempfile -import sys from fsspec.fuse import run from gcsfs.tests.settings import TEST_BUCKET -from gcsfs.tests.utils import gcs_maker, token_restore, my_vcr -import gcsfs +from gcsfs.tests.utils import gcs_maker, my_vcr import threading import time @pytest.mark.xfail -@pytest.mark.skipif("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", - reason="Skipping this test on Travis CI.") -@my_vcr.use_cassette(match=['all']) +@pytest.mark.skipif( + "TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", + reason="Skipping this test on Travis CI.", +) +@my_vcr.use_cassette(match=["all"]) def test_fuse(token_restore): mountpath = tempfile.mkdtemp() with gcs_maker() as gcs: - th = threading.Thread( - target=lambda: run(gcs, TEST_BUCKET + '/', mountpath) - ) + th = threading.Thread(target=lambda: run(gcs, TEST_BUCKET + "/", mountpath)) th.daemon = True th.start() @@ -28,19 +27,19 @@ def test_fuse(token_restore): timeout = 20 while True: try: - open(os.path.join(mountpath, 'lock'), 'w').close() - os.remove(os.path.join(mountpath, 'lock')) + open(os.path.join(mountpath, "lock"), "w").close() + os.remove(os.path.join(mountpath, "lock")) break - except: + except: # noqa: E722 time.sleep(0.5) timeout -= 0.5 assert timeout > 0 - with open(os.path.join(mountpath, 'hello'), 'w') as f: + with open(os.path.join(mountpath, "hello"), "w") as f: # NB this is in TEXT mode - f.write('hello') + f.write("hello") files = os.listdir(mountpath) - assert 'hello' in files - with open(os.path.join(mountpath, 'hello'), 'r') as f: + assert "hello" in files + with open(os.path.join(mountpath, "hello"), "r") as f: # NB this is in TEXT mode - assert f.read() == 'hello' + assert f.read() == "hello" diff --git a/gcsfs/tests/test_manyopens.py b/gcsfs/tests/test_manyopens.py index f05425a37b..af7bb1def3 100644 --- a/gcsfs/tests/test_manyopens.py +++ b/gcsfs/tests/test_manyopens.py @@ -16,19 +16,22 @@ def run(): if len(sys.argv) != 4: - print('usage: python -m gcsfs.tests.test_manyopens ' - ' ') + print( + "usage: python -m gcsfs.tests.test_manyopens " + ' ' + ) return project = sys.argv[1] credentials = sys.argv[2] file = sys.argv[3] - print('project: ' + project) + print("project: " + project) for i in range(2000): # Issue #12 only reproduces if I re-create the fs object every time. fs = gcsfs.GCSFileSystem(project=project, token=credentials) - print('attempt %s' % i) - with fs.open(file, 'rb') as o: + print("attempt %s" % i) + with fs.open(file, "rb") as o: o.readline() -if __name__ == '__main__': + +if __name__ == "__main__": run() diff --git a/gcsfs/tests/test_mapping.py b/gcsfs/tests/test_mapping.py index a6b65c2324..78646113ca 100644 --- a/gcsfs/tests/test_mapping.py +++ b/gcsfs/tests/test_mapping.py @@ -1,20 +1,18 @@ import pytest -from gcsfs.tests.settings import TEST_PROJECT, GOOGLE_TOKEN, TEST_BUCKET -from gcsfs.tests.utils import (tempdir, token_restore, my_vcr, gcs_maker, - files, csv_files, text_files, a, b, c, d) -from gcsfs import GCSFileSystem, core +from gcsfs.tests.settings import TEST_BUCKET +from gcsfs.tests.utils import my_vcr, gcs_maker -root = TEST_BUCKET+'/mapping' +root = TEST_BUCKET + "/mapping" def test_api(): import gcsfs - assert 'GCSMap' in dir(gcsfs) - assert 'mapping' in dir(gcsfs) + assert "GCSMap" in dir(gcsfs) + assert "mapping" in dir(gcsfs) -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_map_simple(token_restore): with gcs_maker() as gcs: d = gcs.get_mapper(root) @@ -25,143 +23,145 @@ def test_map_simple(token_restore): assert list(d.items()) == [] -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_map_default_gcsfilesystem(token_restore): with gcs_maker() as gcs: d = gcs.get_mapper(root) assert d.fs is gcs -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_map_errors(token_restore): with gcs_maker() as gcs: d = gcs.get_mapper(root) with pytest.raises(KeyError): - d['nonexistent'] + d["nonexistent"] try: - gcs.get_mapper('does-not-exist') + gcs.get_mapper("does-not-exist") except Exception as e: - assert 'does-not-exist' in str(e) + assert "does-not-exist" in str(e) -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_map_with_data(token_restore): with gcs_maker() as gcs: d = gcs.get_mapper(root) - d['x'] = b'123' - assert list(d) == list(d.keys()) == ['x'] - assert list(d.values()) == [b'123'] - assert list(d.items()) == [('x', b'123')] - assert d['x'] == b'123' + d["x"] = b"123" + assert list(d) == list(d.keys()) == ["x"] + assert list(d.values()) == [b"123"] + assert list(d.items()) == [("x", b"123")] + assert d["x"] == b"123" assert bool(d) - assert gcs.find(root) == [TEST_BUCKET+'/mapping/x'] - d['x'] = b'000' - assert d['x'] == b'000' + assert gcs.find(root) == [TEST_BUCKET + "/mapping/x"] + d["x"] = b"000" + assert d["x"] == b"000" - d['y'] = b'456' - assert d['y'] == b'456' - assert set(d) == {'x', 'y'} + d["y"] = b"456" + assert d["y"] == b"456" + assert set(d) == {"x", "y"} d.clear() assert list(d) == [] -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_map_complex_keys(token_restore): with gcs_maker() as gcs: d = gcs.get_mapper(root) - d[1] = b'hello' - assert d[1] == b'hello' + d[1] = b"hello" + assert d[1] == b"hello" del d[1] - d[1, 2] = b'world' - assert d[1, 2] == b'world' + d[1, 2] = b"world" + assert d[1, 2] == b"world" del d[1, 2] - d['x', 1, 2] = b'hello world' - assert d['x', 1, 2] == b'hello world' + d["x", 1, 2] = b"hello world" + assert d["x", 1, 2] == b"hello world" - assert ('x', 1, 2) in d + assert ("x", 1, 2) in d -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_map_clear_empty(token_restore): with gcs_maker() as gcs: d = gcs.get_mapper(root) d.clear() assert list(d) == [] - d[1] = b'1' - assert list(d) == ['1'] + d[1] = b"1" + assert list(d) == ["1"] d.clear() assert list(d) == [] -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_map_pickle(token_restore): with gcs_maker() as gcs: d = gcs.get_mapper(root) - d['x'] = b'1' - assert d['x'] == b'1' + d["x"] = b"1" + assert d["x"] == b"1" import pickle + d2 = pickle.loads(pickle.dumps(d)) - assert d2['x'] == b'1' + assert d2["x"] == b"1" -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_map_array(token_restore): with gcs_maker() as gcs: from array import array + d = gcs.get_mapper(root) - d['x'] = array('B', [65] * 1000) + d["x"] = array("B", [65] * 1000) - assert d['x'] == b'A' * 1000 + assert d["x"] == b"A" * 1000 -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_map_bytearray(token_restore): with gcs_maker() as gcs: - from array import array d = gcs.get_mapper(root) - d['x'] = bytearray(b'123') + d["x"] = bytearray(b"123") - assert d['x'] == b'123' + assert d["x"] == b"123" -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_new_bucket(token_restore): with gcs_maker() as gcs: - new_bucket = TEST_BUCKET + 'new-bucket' + new_bucket = TEST_BUCKET + "new-bucket" try: gcs.rmdir(new_bucket) - except: + except: # noqa: E722 pass with pytest.raises(Exception) as e: d = gcs.get_mapper(new_bucket, check=True) - assert 'create=True' in str(e.value) + assert "create=True" in str(e.value) try: d = gcs.get_mapper(new_bucket, create=True) assert not d - d = gcs.get_mapper(new_bucket + '/new-directory') + d = gcs.get_mapper(new_bucket + "/new-directory") assert not d finally: gcs.rmdir(new_bucket) -@my_vcr.use_cassette(match=['all']) +@my_vcr.use_cassette(match=["all"]) def test_map_pickle(token_restore): import pickle + with gcs_maker() as gcs: d = gcs.get_mapper(root) - d['x'] = b'1234567890' + d["x"] = b"1234567890" b = pickle.dumps(d) - assert b'1234567890' not in b + assert b"1234567890" not in b e = pickle.loads(b) - assert dict(e) == {'x': b'1234567890'} + assert dict(e) == {"x": b"1234567890"} diff --git a/gcsfs/tests/test_utils.py b/gcsfs/tests/test_utils.py index 76f3dafc53..2bc4f60069 100644 --- a/gcsfs/tests/test_utils.py +++ b/gcsfs/tests/test_utils.py @@ -1,65 +1,69 @@ import io import os import requests -from gcsfs.utils import read_block, seek_delimiter, HttpError, \ - RateLimitException, is_retriable +from gcsfs.utils import ( + read_block, + seek_delimiter, + HttpError, + RateLimitException, + is_retriable, +) from gcsfs.tests.utils import tmpfile def test_tempfile(): with tmpfile() as fn: - with open(fn, 'w'): + with open(fn, "w"): pass assert os.path.exists(fn) assert not os.path.exists(fn) def test_read_block(): - delimiter = b'\n' - data = delimiter.join([b'123', b'456', b'789']) + delimiter = b"\n" + data = delimiter.join([b"123", b"456", b"789"]) f = io.BytesIO(data) - assert read_block(f, 1, 2) == b'23' - assert read_block(f, 0, 1, delimiter=b'\n') == b'123\n' - assert read_block(f, 0, 2, delimiter=b'\n') == b'123\n' - assert read_block(f, 0, 3, delimiter=b'\n') == b'123\n' - assert read_block(f, 0, 5, delimiter=b'\n') == b'123\n456\n' - assert read_block(f, 0, 8, delimiter=b'\n') == b'123\n456\n789' - assert read_block(f, 0, 100, delimiter=b'\n') == b'123\n456\n789' - assert read_block(f, 1, 1, delimiter=b'\n') == b'' - assert read_block(f, 1, 5, delimiter=b'\n') == b'456\n' - assert read_block(f, 1, 8, delimiter=b'\n') == b'456\n789' + assert read_block(f, 1, 2) == b"23" + assert read_block(f, 0, 1, delimiter=b"\n") == b"123\n" + assert read_block(f, 0, 2, delimiter=b"\n") == b"123\n" + assert read_block(f, 0, 3, delimiter=b"\n") == b"123\n" + assert read_block(f, 0, 5, delimiter=b"\n") == b"123\n456\n" + assert read_block(f, 0, 8, delimiter=b"\n") == b"123\n456\n789" + assert read_block(f, 0, 100, delimiter=b"\n") == b"123\n456\n789" + assert read_block(f, 1, 1, delimiter=b"\n") == b"" + assert read_block(f, 1, 5, delimiter=b"\n") == b"456\n" + assert read_block(f, 1, 8, delimiter=b"\n") == b"456\n789" - for ols in [[(0, 3), (3, 3), (6, 3), (9, 2)], - [(0, 4), (4, 4), (8, 4)]]: - out = [read_block(f, o, l, b'\n') for o, l in ols] + for ols in [[(0, 3), (3, 3), (6, 3), (9, 2)], [(0, 4), (4, 4), (8, 4)]]: + out = [read_block(f, o, l, b"\n") for o, l in ols] assert b"".join(filter(None, out)) == data def test_seek_delimiter_endline(): - f = io.BytesIO(b'123\n456\n789') + f = io.BytesIO(b"123\n456\n789") # if at zero, stay at zero - seek_delimiter(f, b'\n', 5) + seek_delimiter(f, b"\n", 5) assert f.tell() == 0 # choose the first block for bs in [1, 5, 100]: f.seek(1) - seek_delimiter(f, b'\n', blocksize=bs) + seek_delimiter(f, b"\n", blocksize=bs) assert f.tell() == 4 # handle long delimiters well, even with short blocksizes - f = io.BytesIO(b'123abc456abc789') + f = io.BytesIO(b"123abc456abc789") for bs in [1, 2, 3, 4, 5, 6, 10]: f.seek(1) - seek_delimiter(f, b'abc', blocksize=bs) + seek_delimiter(f, b"abc", blocksize=bs) assert f.tell() == 6 # End at the end - f = io.BytesIO(b'123\n456') + f = io.BytesIO(b"123\n456") f.seek(5) - seek_delimiter(f, b'\n', 5) + seek_delimiter(f, b"\n", 5) assert f.tell() == 7 @@ -68,19 +72,19 @@ def retriable_exception(): assert is_retriable(e) e = ValueError assert not is_retriable(e) - e = HttpError({'message': '', 'code': 500}) + e = HttpError({"message": "", "code": 500}) assert is_retriable(e) - e = HttpError({'message': '', 'code': '500'}) + e = HttpError({"message": "", "code": "500"}) assert is_retriable(e) - e = HttpError({'message': '', 'code': 400}) + e = HttpError({"message": "", "code": 400}) assert not is_retriable(e) e = HttpError() assert not is_retriable(e) e = RateLimitException() assert not is_retriable(e) - e = RateLimitException({'message': '', 'code': 501}) + e = RateLimitException({"message": "", "code": 501}) assert is_retriable(e) - e = RateLimitException({'message': '', 'code': '501'}) + e = RateLimitException({"message": "", "code": "501"}) assert is_retriable(e) - e = RateLimitException({'message': '', 'code': 400}) + e = RateLimitException({"message": "", "code": 400}) assert not is_retriable(e) diff --git a/gcsfs/tests/utils.py b/gcsfs/tests/utils.py index 2e8cdeabfc..daedb48154 100644 --- a/gcsfs/tests/utils.py +++ b/gcsfs/tests/utils.py @@ -4,51 +4,57 @@ import os import shutil import re -import pytest import pickle -import sys import tempfile -import vcr import gcsfs.utils from gcsfs.core import GCSFileSystem -from gcsfs.tests.settings import (TEST_BUCKET, TEST_PROJECT, RECORD_MODE, - GOOGLE_TOKEN, FAKE_GOOGLE_TOKEN, DEBUG) +from gcsfs.tests.settings import ( + TEST_BUCKET, + TEST_PROJECT, + RECORD_MODE, + GOOGLE_TOKEN, + DEBUG, +) import vcr -import requests import logging if DEBUG: - logging.basicConfig() # you need to initialize logging, otherwise you will not see anything from vcrpy + logging.basicConfig() # you need to initialize logging, otherwise you will not see anything from vcrpy vcr_log = logging.getLogger("vcr") vcr_log.setLevel(logging.DEBUG) def before_record_response(response): r = pickle.loads(pickle.dumps(response)) - for field in ['Alt-Svc', 'Date', 'Expires', 'X-GUploader-UploadID']: - r['headers'].pop(field, None) - if 'Location' in r['headers']: - r['headers']['Location'] = [r['headers']['Location'][0].replace( - TEST_BUCKET, 'gcsfs-testing')] + for field in ["Alt-Svc", "Date", "Expires", "X-GUploader-UploadID"]: + r["headers"].pop(field, None) + if "Location" in r["headers"]: + r["headers"]["Location"] = [ + r["headers"]["Location"][0].replace(TEST_BUCKET, "gcsfs-testing") + ] try: try: - data = json.loads(gzip.decompress(r['body']['string']).decode()) - if 'access_token' in data: - data['access_token'] = 'xxx' - if 'id_token' in data: - data['id_token'] = 'xxx' - if 'refresh_token' in data: - data['refresh_token'] = 'xxx' - r['body']['string'] = gzip.compress( - json.dumps(data).replace( - TEST_PROJECT, 'test_project').replace( - TEST_BUCKET, 'gcsfs-testing').encode()) + data = json.loads(gzip.decompress(r["body"]["string"]).decode()) + if "access_token" in data: + data["access_token"] = "xxx" + if "id_token" in data: + data["id_token"] = "xxx" + if "refresh_token" in data: + data["refresh_token"] = "xxx" + r["body"]["string"] = gzip.compress( + json.dumps(data) + .replace(TEST_PROJECT, "test_project") + .replace(TEST_BUCKET, "gcsfs-testing") + .encode() + ) except (OSError, TypeError, ValueError): - r['body']['string'] = r['body']['string'].replace( - TEST_PROJECT.encode(), b'test_project').replace( - TEST_BUCKET.encode(), b'gcsfs-testing') + r["body"]["string"] = ( + r["body"]["string"] + .replace(TEST_PROJECT.encode(), b"test_project") + .replace(TEST_BUCKET.encode(), b"gcsfs-testing") + ) except Exception: pass return r @@ -56,86 +62,100 @@ def before_record_response(response): def before_record(request): r = pickle.loads(pickle.dumps(request)) - for field in ['User-Agent']: + for field in ["User-Agent"]: r.headers.pop(field, None) - r.uri = request.uri.replace(TEST_PROJECT, 'test_project').replace( - TEST_BUCKET, 'gcsfs-testing') + r.uri = request.uri.replace(TEST_PROJECT, "test_project").replace( + TEST_BUCKET, "gcsfs-testing" + ) if r.body: for field in GOOGLE_TOKEN: - r.body = r.body.replace(GOOGLE_TOKEN[field].encode(), b'xxx') - r.body = r.body.replace(TEST_PROJECT.encode(), b'test_project').replace( - TEST_BUCKET.encode(), b'gcsfs-testing') - r.body = re.sub(b'refresh_token=[^&]+', b'refresh_token=xxx', r.body) + r.body = r.body.replace(GOOGLE_TOKEN[field].encode(), b"xxx") + r.body = r.body.replace(TEST_PROJECT.encode(), b"test_project").replace( + TEST_BUCKET.encode(), b"gcsfs-testing" + ) + r.body = re.sub(b"refresh_token=[^&]+", b"refresh_token=xxx", r.body) return r def matcher(r1, r2): - if r2.uri.replace(TEST_PROJECT, 'test_project').replace( - TEST_BUCKET, 'gcsfs-testing') != r1.uri: + if ( + r2.uri.replace(TEST_PROJECT, "test_project").replace( + TEST_BUCKET, "gcsfs-testing" + ) + != r1.uri + ): return False if r1.method != r2.method: return False - if r1.method != 'POST' and r1.body != r2.body: + if r1.method != "POST" and r1.body != r2.body: return False - if r1.method == 'POST': + if r1.method == "POST": try: return json.loads(r2.body.decode()) == json.loads(r1.body.decode()) - except: + except: # noqa: E722 pass - r1q = (r1.body or b'').split(b'&') - r2q = (r2.body or b'').split(b'&') + r1q = (r1.body or b"").split(b"&") + r2q = (r2.body or b"").split(b"&") for q in r1q: - if b'secret' in q or b'token' in q: + if b"secret" in q or b"token" in q: continue if q not in r2q: return False else: - for key in ['Content-Length', 'Content-Type', 'Range']: + for key in ["Content-Length", "Content-Type", "Range"]: if key in r1.headers and key in r2.headers: - if r1.headers.get(key, '') != r2.headers.get(key, ''): + if r1.headers.get(key, "") != r2.headers.get(key, ""): return False return True -recording_path = os.path.join(os.path.dirname(__file__), 'recordings') + +recording_path = os.path.join(os.path.dirname(__file__), "recordings") my_vcr = vcr.VCR( cassette_library_dir=recording_path, record_mode=RECORD_MODE, - path_transformer=vcr.VCR.ensure_suffix('.yaml'), - filter_headers=['Authorization'], - filter_query_parameters=['refresh_token', 'client_id', - 'client_secret'], + path_transformer=vcr.VCR.ensure_suffix(".yaml"), + filter_headers=["Authorization"], + filter_query_parameters=["refresh_token", "client_id", "client_secret"], before_record_response=before_record_response, - before_record=before_record - ) -my_vcr.register_matcher('all', matcher) -my_vcr.match_on = ['all'] -files = {'test/accounts.1.json': (b'{"amount": 100, "name": "Alice"}\n' - b'{"amount": 200, "name": "Bob"}\n' - b'{"amount": 300, "name": "Charlie"}\n' - b'{"amount": 400, "name": "Dennis"}\n'), - 'test/accounts.2.json': (b'{"amount": 500, "name": "Alice"}\n' - b'{"amount": 600, "name": "Bob"}\n' - b'{"amount": 700, "name": "Charlie"}\n' - b'{"amount": 800, "name": "Dennis"}\n')} - -csv_files = {'2014-01-01.csv': (b'name,amount,id\n' - b'Alice,100,1\n' - b'Bob,200,2\n' - b'Charlie,300,3\n'), - '2014-01-02.csv': (b'name,amount,id\n'), - '2014-01-03.csv': (b'name,amount,id\n' - b'Dennis,400,4\n' - b'Edith,500,5\n' - b'Frank,600,6\n')} -text_files = {'nested/file1': b'hello\n', - 'nested/file2': b'world', - 'nested/nested2/file1': b'hello\n', - 'nested/nested2/file2': b'world'} -a = TEST_BUCKET+'/tmp/test/a' -b = TEST_BUCKET+'/tmp/test/b' -c = TEST_BUCKET+'/tmp/test/c' -d = TEST_BUCKET+'/tmp/test/d' + before_record=before_record, +) +my_vcr.register_matcher("all", matcher) +my_vcr.match_on = ["all"] +files = { + "test/accounts.1.json": ( + b'{"amount": 100, "name": "Alice"}\n' + b'{"amount": 200, "name": "Bob"}\n' + b'{"amount": 300, "name": "Charlie"}\n' + b'{"amount": 400, "name": "Dennis"}\n' + ), + "test/accounts.2.json": ( + b'{"amount": 500, "name": "Alice"}\n' + b'{"amount": 600, "name": "Bob"}\n' + b'{"amount": 700, "name": "Charlie"}\n' + b'{"amount": 800, "name": "Dennis"}\n' + ), +} + +csv_files = { + "2014-01-01.csv": ( + b"name,amount,id\n" b"Alice,100,1\n" b"Bob,200,2\n" b"Charlie,300,3\n" + ), + "2014-01-02.csv": b"name,amount,id\n", + "2014-01-03.csv": ( + b"name,amount,id\n" b"Dennis,400,4\n" b"Edith,500,5\n" b"Frank,600,6\n" + ), +} +text_files = { + "nested/file1": b"hello\n", + "nested/file2": b"world", + "nested/nested2/file1": b"hello\n", + "nested/nested2/file2": b"world", +} +a = TEST_BUCKET + "/tmp/test/a" +b = TEST_BUCKET + "/tmp/test/b" +c = TEST_BUCKET + "/tmp/test/c" +d = TEST_BUCKET + "/tmp/test/d" @contextmanager @@ -159,8 +179,8 @@ def tempdir(dir=None): @contextmanager -def tmpfile(extension='', dir=None): - extension = '.' + extension.lstrip('.') +def tmpfile(extension="", dir=None): + extension = "." + extension.lstrip(".") handle, filename = tempfile.mkstemp(extension, dir=dir) os.close(handle) os.remove(filename) @@ -176,25 +196,15 @@ def tmpfile(extension='', dir=None): os.remove(filename) -@pytest.yield_fixture -def token_restore(): - cache = GCSFileSystem.tokens - try: - GCSFileSystem.tokens = {} - yield - finally: - GCSFileSystem.tokens = cache - GCSFileSystem._save_tokens() - - @contextmanager def gcs_maker(populate=False): gcs = GCSFileSystem(TEST_PROJECT, token=GOOGLE_TOKEN) gcs.invalidate_cache() try: try: - gcs.mkdir(TEST_BUCKET, default_acl="authenticatedread", - acl="publicReadWrite") + gcs.mkdir( + TEST_BUCKET, default_acl="authenticatedread", acl="publicReadWrite" + ) except gcsfs.utils.HttpError: pass @@ -209,7 +219,7 @@ def gcs_maker(populate=False): if populate: for flist in [files, csv_files, text_files]: for fname, data in flist.items(): - with gcs.open(TEST_BUCKET+'/'+fname, 'wb') as f: + with gcs.open(TEST_BUCKET + "/" + fname, "wb") as f: f.write(data) gcs.invalidate_cache() yield gcs @@ -217,5 +227,5 @@ def gcs_maker(populate=False): for f in gcs.find(TEST_BUCKET): try: gcs.rm(f) - except: + except: # noqa: E722 pass diff --git a/gcsfs/utils.py b/gcsfs/utils.py index 29354d209d..4a8926d717 100644 --- a/gcsfs/utils.py +++ b/gcsfs/utils.py @@ -20,7 +20,7 @@ def seek_delimiter(file, delimiter, blocksize): if file.tell() == 0: return - last = b'' + last = b"" while True: current = file.read(blocksize) if not current: @@ -32,7 +32,7 @@ def seek_delimiter(file, delimiter, blocksize): return except ValueError: pass - last = full[-len(delimiter):] + last = full[-len(delimiter) :] def read_block(f, offset, length, delimiter=None): @@ -71,12 +71,12 @@ def read_block(f, offset, length, delimiter=None): """ if delimiter: f.seek(offset) - seek_delimiter(f, delimiter, 2**16) + seek_delimiter(f, delimiter, 2 ** 16) start = f.tell() length -= start - offset f.seek(start + length) - seek_delimiter(f, delimiter, 2**16) + seek_delimiter(f, delimiter, 2 ** 16) end = f.tell() offset = start @@ -89,21 +89,23 @@ def read_block(f, offset, length, delimiter=None): class RateLimitException(Exception): """Holds the message and code from cloud errors.""" + def __init__(self, error_response=None): - self.message = error_response.get('message', '') - self.code = error_response.get('code', None) + self.message = error_response.get("message", "") + self.code = error_response.get("code", None) # Call the base class constructor with the parameters it needs super(RateLimitException, self).__init__(self.message) class HttpError(Exception): """Holds the message and code from cloud errors.""" + def __init__(self, error_response=None): if error_response: - self.message = error_response.get('message', '') - self.code = error_response.get('code', None) + self.message = error_response.get("message", "") + self.code = error_response.get("code", None) else: - self.message = '' + self.message = "" self.code = None # Call the base class constructor with the parameters it needs super(HttpError, self).__init__(self.message) diff --git a/setup.cfg b/setup.cfg index c5b6847838..d6cc0c6ccb 100644 --- a/setup.cfg +++ b/setup.cfg @@ -7,3 +7,18 @@ tag_prefix = [bdist_wheel] universal=1 + +[flake8] +exclude = versioneer.py,docs/source/conf.py +ignore = + E20, # Extra space in brackets + E231,E241, # Multiple spaces around "," + E26, # Comments + E4, # Import formatting + E721, # Comparing types instead of isinstance + E731, # Assigning lambda expression + E741, # Ambiguous variable names + W503, # line break before binary operator + W504, # line break after binary operator + F811, # redefinition of unused 'loop' from line 10 +max-line-length = 120 diff --git a/setup.py b/setup.py index 76120d1503..7df5dab421 100755 --- a/setup.py +++ b/setup.py @@ -5,30 +5,31 @@ import versioneer -setup(name='gcsfs', - version=versioneer.get_version(), - cmdclass=versioneer.get_cmdclass(), - description='Convenient Filesystem interface over GCS', - url='https://github.com/dask/gcsfs', - maintainer='Martin Durant', - maintainer_email='mdurant@anaconda.com', - license='BSD', - classifiers=[ - 'Development Status :: 4 - Beta', - 'Intended Audience :: Developers', - 'License :: OSI Approved :: BSD License', - 'Operating System :: OS Independent', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - ], - keywords=['google-cloud-storage', 'gcloud', 'file-system'], - packages=['gcsfs', 'gcsfs.cli'], - install_requires=[open('requirements.txt').read().strip().split('\n')], - long_description=(open('README.rst').read() - if os.path.exists('README.rst') else ''), - extras_require={ - "gcsfuse": ["fusepy"] - }, - python_requires='>=3.5', - zip_safe=False) +setup( + name="gcsfs", + version=versioneer.get_version(), + cmdclass=versioneer.get_cmdclass(), + description="Convenient Filesystem interface over GCS", + url="https://github.com/dask/gcsfs", + maintainer="Martin Durant", + maintainer_email="mdurant@anaconda.com", + license="BSD", + classifiers=[ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "License :: OSI Approved :: BSD License", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + ], + keywords=["google-cloud-storage", "gcloud", "file-system"], + packages=["gcsfs", "gcsfs.cli"], + install_requires=[open("requirements.txt").read().strip().split("\n")], + long_description=( + open("README.rst").read() if os.path.exists("README.rst") else "" + ), + extras_require={"gcsfuse": ["fusepy"]}, + python_requires=">=3.5", + zip_safe=False, +) diff --git a/versioneer.py b/versioneer.py index 64fea1c892..2b54540510 100644 --- a/versioneer.py +++ b/versioneer.py @@ -1,4 +1,3 @@ - # Version: 0.18 """The Versioneer - like a rocketeer, but for versions. @@ -277,6 +276,7 @@ """ from __future__ import print_function + try: import configparser except ImportError: @@ -308,11 +308,13 @@ def get_root(): setup_py = os.path.join(root, "setup.py") versioneer_py = os.path.join(root, "versioneer.py") if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)): - err = ("Versioneer was unable to run the project root directory. " - "Versioneer requires setup.py to be executed from " - "its immediate directory (like 'python setup.py COMMAND'), " - "or in a way that lets it use sys.argv[0] to find the root " - "(like 'python path/to/setup.py COMMAND').") + err = ( + "Versioneer was unable to run the project root directory. " + "Versioneer requires setup.py to be executed from " + "its immediate directory (like 'python setup.py COMMAND'), " + "or in a way that lets it use sys.argv[0] to find the root " + "(like 'python path/to/setup.py COMMAND')." + ) raise VersioneerBadRootError(err) try: # Certain runtime workflows (setup.py install/develop in a setuptools @@ -325,8 +327,10 @@ def get_root(): me_dir = os.path.normcase(os.path.splitext(me)[0]) vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0]) if me_dir != vsr_dir: - print("Warning: build in %s is using versioneer.py from %s" - % (os.path.dirname(me), versioneer_py)) + print( + "Warning: build in %s is using versioneer.py from %s" + % (os.path.dirname(me), versioneer_py) + ) except NameError: pass return root @@ -348,6 +352,7 @@ def get(parser, name): if parser.has_option("versioneer", name): return parser.get("versioneer", name) return None + cfg = VersioneerConfig() cfg.VCS = VCS cfg.style = get(parser, "style") or "" @@ -372,17 +377,18 @@ class NotThisMethod(Exception): def register_vcs_handler(vcs, method): # decorator """Decorator to mark a method as the handler for a particular VCS.""" + def decorate(f): """Store f in HANDLERS[vcs][method].""" if vcs not in HANDLERS: HANDLERS[vcs] = {} HANDLERS[vcs][method] = f return f + return decorate -def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, - env=None): +def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None): """Call the given command(s).""" assert isinstance(commands, list) p = None @@ -390,10 +396,13 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, try: dispcmd = str([c] + args) # remember shell=False, so use git.cmd on windows, not just git - p = subprocess.Popen([c] + args, cwd=cwd, env=env, - stdout=subprocess.PIPE, - stderr=(subprocess.PIPE if hide_stderr - else None)) + p = subprocess.Popen( + [c] + args, + cwd=cwd, + env=env, + stdout=subprocess.PIPE, + stderr=(subprocess.PIPE if hide_stderr else None), + ) break except EnvironmentError: e = sys.exc_info()[1] @@ -418,7 +427,9 @@ def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, return stdout, p.returncode -LONG_VERSION_PY['git'] = ''' +LONG_VERSION_PY[ + "git" +] = ''' # This file helps to compute a version number in source trees obtained from # git-archive tarball (such as those provided by githubs download-from-tag # feature). Distribution tarballs (built by setup.py sdist) and build @@ -993,7 +1004,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " - tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)]) + tags = set([r[len(TAG) :] for r in refs if r.startswith(TAG)]) if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d @@ -1002,7 +1013,7 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". - tags = set([r for r in refs if re.search(r'\d', r)]) + tags = set([r for r in refs if re.search(r"\d", r)]) if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: @@ -1010,19 +1021,26 @@ def git_versions_from_keywords(keywords, tag_prefix, verbose): for ref in sorted(tags): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): - r = ref[len(tag_prefix):] + r = ref[len(tag_prefix) :] if verbose: print("picking %s" % r) - return {"version": r, - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": None, - "date": date} + return { + "version": r, + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": None, + "date": date, + } # no suitable tags, so version is "0+unknown", but full hex is still there if verbose: print("no suitable tags, using unknown + full revision id") - return {"version": "0+unknown", - "full-revisionid": keywords["full"].strip(), - "dirty": False, "error": "no suitable tags", "date": None} + return { + "version": "0+unknown", + "full-revisionid": keywords["full"].strip(), + "dirty": False, + "error": "no suitable tags", + "date": None, + } @register_vcs_handler("git", "pieces_from_vcs") @@ -1037,8 +1055,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] - out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, - hide_stderr=True) + out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True) if rc != 0: if verbose: print("Directory %s not under git control" % root) @@ -1046,10 +1063,19 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] # if there isn't one, this yields HEX[-dirty] (no NUM) - describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty", - "--always", "--long", - "--match", "%s*" % tag_prefix], - cwd=root) + describe_out, rc = run_command( + GITS, + [ + "describe", + "--tags", + "--dirty", + "--always", + "--long", + "--match", + "%s*" % tag_prefix, + ], + cwd=root, + ) # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") @@ -1072,17 +1098,16 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): dirty = git_describe.endswith("-dirty") pieces["dirty"] = dirty if dirty: - git_describe = git_describe[:git_describe.rindex("-dirty")] + git_describe = git_describe[: git_describe.rindex("-dirty")] # now we have TAG-NUM-gHEX or HEX if "-" in git_describe: # TAG-NUM-gHEX - mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) + mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe) if not mo: # unparseable. Maybe git-describe is misbehaving? - pieces["error"] = ("unable to parse git-describe output: '%s'" - % describe_out) + pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out return pieces # tag @@ -1091,10 +1116,12 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) - pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" - % (full_tag, tag_prefix)) + pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( + full_tag, + tag_prefix, + ) return pieces - pieces["closest-tag"] = full_tag[len(tag_prefix):] + pieces["closest-tag"] = full_tag[len(tag_prefix) :] # distance: number of commits since tag pieces["distance"] = int(mo.group(2)) @@ -1105,13 +1132,13 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): else: # HEX: no tags pieces["closest-tag"] = None - count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], - cwd=root) + count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"], cwd=root) pieces["distance"] = int(count_out) # total number of commits # commit date: see ISO-8601 comment in git_versions_from_keywords() - date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], - cwd=root)[0].strip() + date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[ + 0 + ].strip() pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) return pieces @@ -1167,16 +1194,22 @@ def versions_from_parentdir(parentdir_prefix, root, verbose): for i in range(3): dirname = os.path.basename(root) if dirname.startswith(parentdir_prefix): - return {"version": dirname[len(parentdir_prefix):], - "full-revisionid": None, - "dirty": False, "error": None, "date": None} + return { + "version": dirname[len(parentdir_prefix) :], + "full-revisionid": None, + "dirty": False, + "error": None, + "date": None, + } else: rootdirs.append(root) root = os.path.dirname(root) # up a level if verbose: - print("Tried directories %s but none started with prefix %s" % - (str(rootdirs), parentdir_prefix)) + print( + "Tried directories %s but none started with prefix %s" + % (str(rootdirs), parentdir_prefix) + ) raise NotThisMethod("rootdir doesn't start with parentdir_prefix") @@ -1205,11 +1238,13 @@ def versions_from_file(filename): contents = f.read() except EnvironmentError: raise NotThisMethod("unable to read _version.py") - mo = re.search(r"version_json = '''\n(.*)''' # END VERSION_JSON", - contents, re.M | re.S) + mo = re.search( + r"version_json = '''\n(.*)''' # END VERSION_JSON", contents, re.M | re.S + ) if not mo: - mo = re.search(r"version_json = '''\r\n(.*)''' # END VERSION_JSON", - contents, re.M | re.S) + mo = re.search( + r"version_json = '''\r\n(.*)''' # END VERSION_JSON", contents, re.M | re.S + ) if not mo: raise NotThisMethod("no version_json in _version.py") return json.loads(mo.group(1)) @@ -1218,8 +1253,7 @@ def versions_from_file(filename): def write_to_version_file(filename, versions): """Write the given version number to the given _version.py file.""" os.unlink(filename) - contents = json.dumps(versions, sort_keys=True, - indent=1, separators=(",", ": ")) + contents = json.dumps(versions, sort_keys=True, indent=1, separators=(",", ": ")) with open(filename, "w") as f: f.write(SHORT_VERSION_PY % contents) @@ -1251,8 +1285,7 @@ def render_pep440(pieces): rendered += ".dirty" else: # exception #1 - rendered = "0+untagged.%d.g%s" % (pieces["distance"], - pieces["short"]) + rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered @@ -1366,11 +1399,13 @@ def render_git_describe_long(pieces): def render(pieces, style): """Render the given version pieces into the requested style.""" if pieces["error"]: - return {"version": "unknown", - "full-revisionid": pieces.get("long"), - "dirty": None, - "error": pieces["error"], - "date": None} + return { + "version": "unknown", + "full-revisionid": pieces.get("long"), + "dirty": None, + "error": pieces["error"], + "date": None, + } if not style or style == "default": style = "pep440" # the default @@ -1390,9 +1425,13 @@ def render(pieces, style): else: raise ValueError("unknown style '%s'" % style) - return {"version": rendered, "full-revisionid": pieces["long"], - "dirty": pieces["dirty"], "error": None, - "date": pieces.get("date")} + return { + "version": rendered, + "full-revisionid": pieces["long"], + "dirty": pieces["dirty"], + "error": None, + "date": pieces.get("date"), + } class VersioneerBadRootError(Exception): @@ -1415,8 +1454,9 @@ def get_versions(verbose=False): handlers = HANDLERS.get(cfg.VCS) assert handlers, "unrecognized VCS '%s'" % cfg.VCS verbose = verbose or cfg.verbose - assert cfg.versionfile_source is not None, \ - "please set versioneer.versionfile_source" + assert ( + cfg.versionfile_source is not None + ), "please set versioneer.versionfile_source" assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix" versionfile_abs = os.path.join(root, cfg.versionfile_source) @@ -1470,9 +1510,13 @@ def get_versions(verbose=False): if verbose: print("unable to compute version") - return {"version": "0+unknown", "full-revisionid": None, - "dirty": None, "error": "unable to compute version", - "date": None} + return { + "version": "0+unknown", + "full-revisionid": None, + "dirty": None, + "error": "unable to compute version", + "date": None, + } def get_version(): @@ -1521,6 +1565,7 @@ def run(self): print(" date: %s" % vers.get("date")) if vers["error"]: print(" error: %s" % vers["error"]) + cmds["version"] = cmd_version # we override "build_py" in both distutils and setuptools @@ -1553,14 +1598,15 @@ def run(self): # now locate _version.py in the new build/ directory and replace # it with an updated value if cfg.versionfile_build: - target_versionfile = os.path.join(self.build_lib, - cfg.versionfile_build) + target_versionfile = os.path.join(self.build_lib, cfg.versionfile_build) print("UPDATING %s" % target_versionfile) write_to_version_file(target_versionfile, versions) + cmds["build_py"] = cmd_build_py if "cx_Freeze" in sys.modules: # cx_freeze enabled? from cx_Freeze.dist import build_exe as _build_exe + # nczeczulin reports that py2exe won't like the pep440-style string # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g. # setup(console=[{ @@ -1581,17 +1627,21 @@ def run(self): os.unlink(target_versionfile) with open(cfg.versionfile_source, "w") as f: LONG = LONG_VERSION_PY[cfg.VCS] - f.write(LONG % - {"DOLLAR": "$", - "STYLE": cfg.style, - "TAG_PREFIX": cfg.tag_prefix, - "PARENTDIR_PREFIX": cfg.parentdir_prefix, - "VERSIONFILE_SOURCE": cfg.versionfile_source, - }) + f.write( + LONG + % { + "DOLLAR": "$", + "STYLE": cfg.style, + "TAG_PREFIX": cfg.tag_prefix, + "PARENTDIR_PREFIX": cfg.parentdir_prefix, + "VERSIONFILE_SOURCE": cfg.versionfile_source, + } + ) + cmds["build_exe"] = cmd_build_exe del cmds["build_py"] - if 'py2exe' in sys.modules: # py2exe enabled? + if "py2exe" in sys.modules: # py2exe enabled? try: from py2exe.distutils_buildexe import py2exe as _py2exe # py3 except ImportError: @@ -1610,13 +1660,17 @@ def run(self): os.unlink(target_versionfile) with open(cfg.versionfile_source, "w") as f: LONG = LONG_VERSION_PY[cfg.VCS] - f.write(LONG % - {"DOLLAR": "$", - "STYLE": cfg.style, - "TAG_PREFIX": cfg.tag_prefix, - "PARENTDIR_PREFIX": cfg.parentdir_prefix, - "VERSIONFILE_SOURCE": cfg.versionfile_source, - }) + f.write( + LONG + % { + "DOLLAR": "$", + "STYLE": cfg.style, + "TAG_PREFIX": cfg.tag_prefix, + "PARENTDIR_PREFIX": cfg.parentdir_prefix, + "VERSIONFILE_SOURCE": cfg.versionfile_source, + } + ) + cmds["py2exe"] = cmd_py2exe # we override different "sdist" commands for both environments @@ -1643,8 +1697,10 @@ def make_release_tree(self, base_dir, files): # updated value target_versionfile = os.path.join(base_dir, cfg.versionfile_source) print("UPDATING %s" % target_versionfile) - write_to_version_file(target_versionfile, - self._versioneer_generated_versions) + write_to_version_file( + target_versionfile, self._versioneer_generated_versions + ) + cmds["sdist"] = cmd_sdist return cmds @@ -1699,11 +1755,13 @@ def do_setup(): root = get_root() try: cfg = get_config_from_root(root) - except (EnvironmentError, configparser.NoSectionError, - configparser.NoOptionError) as e: + except ( + EnvironmentError, + configparser.NoSectionError, + configparser.NoOptionError, + ) as e: if isinstance(e, (EnvironmentError, configparser.NoSectionError)): - print("Adding sample versioneer config to setup.cfg", - file=sys.stderr) + print("Adding sample versioneer config to setup.cfg", file=sys.stderr) with open(os.path.join(root, "setup.cfg"), "a") as f: f.write(SAMPLE_CONFIG) print(CONFIG_ERROR, file=sys.stderr) @@ -1712,15 +1770,18 @@ def do_setup(): print(" creating %s" % cfg.versionfile_source) with open(cfg.versionfile_source, "w") as f: LONG = LONG_VERSION_PY[cfg.VCS] - f.write(LONG % {"DOLLAR": "$", - "STYLE": cfg.style, - "TAG_PREFIX": cfg.tag_prefix, - "PARENTDIR_PREFIX": cfg.parentdir_prefix, - "VERSIONFILE_SOURCE": cfg.versionfile_source, - }) - - ipy = os.path.join(os.path.dirname(cfg.versionfile_source), - "__init__.py") + f.write( + LONG + % { + "DOLLAR": "$", + "STYLE": cfg.style, + "TAG_PREFIX": cfg.tag_prefix, + "PARENTDIR_PREFIX": cfg.parentdir_prefix, + "VERSIONFILE_SOURCE": cfg.versionfile_source, + } + ) + + ipy = os.path.join(os.path.dirname(cfg.versionfile_source), "__init__.py") if os.path.exists(ipy): try: with open(ipy, "r") as f: @@ -1762,8 +1823,10 @@ def do_setup(): else: print(" 'versioneer.py' already in MANIFEST.in") if cfg.versionfile_source not in simple_includes: - print(" appending versionfile_source ('%s') to MANIFEST.in" % - cfg.versionfile_source) + print( + " appending versionfile_source ('%s') to MANIFEST.in" + % cfg.versionfile_source + ) with open(manifest_in, "a") as f: f.write("include %s\n" % cfg.versionfile_source) else: