Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Delay dependency checks #447

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 36 additions & 42 deletions pangolin/command.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,6 @@
#!/usr/bin/env python3
from . import _program
from pangolin import __version__
from pangolin.utils import data_checks
try:
import pangolin_data
except ImportError:
data_checks.install_error("pangolin_data", "https://github.com/cov-lineages/pangolin-data.git")

try:
import scorpio
except ImportError:
data_checks.install_error("scorpio", "https://github.com/cov-lineages/scorpio.git")

try:
import constellations
except ImportError:
data_checks.install_error("constellations", "https://github.com/cov-lineages/constellations.git")

import os
import sys
Expand All @@ -29,8 +14,7 @@


from pangolin.utils.log_colours import green,cyan
from pangolin.utils import dependency_checks

from pangolin.utils import data_checks
from pangolin.utils import update


Expand Down Expand Up @@ -80,15 +64,15 @@ def main(sysargs = sys.argv[1:]):
d_group.add_argument("--update-data", action='store_true',dest="update_data", default=False, help="Automatically updates to latest release of constellations and pangolin-data, including the pangoLEARN model, UShER tree file and alias file (also pangolin-assignment if it has been installed using --add-assignment-cache), then exits.")
d_group.add_argument('--add-assignment-cache', action='store_true', dest="add_assignment_cache", default=False, help="Install the pangolin-assignment repository for use with --use-assignment-cache. This makes updates slower and makes pangolin slower for small numbers of input sequences but much faster for large numbers of input sequences.")
d_group.add_argument('--use-assignment-cache', action='store_true', dest="use_assignment_cache", default=False, help="Use assignment cache from optional pangolin-assignment repository. NOTE: the repository must be installed by --add-assignment-cache before using --use-assignment-cache.")
d_group.add_argument('-d', '--datadir', action='store',dest="datadir",help="Data directory minimally containing the pangoLEARN model, header files and UShER tree. Default: Installed pangolin-data package.")
d_group.add_argument('--use-old-datadir', action='store_true', default=False, help="Use the data from data directory even if older than data installed via Python packages. Default: False")
d_group.add_argument('-d', '--datadir', action='store',dest="datadir",help="Data directory to treat as an additional source of versions of pangolin-data, constellations and pangolin-assignent. Discovered versions will take precedence over environment-installed versions unless they are older than the environment-installed ones or unconditionally if --use-old-datadir is specified. Note: --add-assignment-cache and --update-data are respecting --datadir and will perform their operations there.")
d_group.add_argument('--use-old-datadir', action='store_true', default=False, help="Use the data from data directory even if older than environment-installed data packages. Default: False")
d_group.add_argument('--usher-tree', action='store', dest='usher_protobuf', help="UShER Mutation Annotated Tree protobuf file to use instead of default from pangolin-data repository or --datadir.")
d_group.add_argument('--assignment-cache', action='store', dest='assignment_cache', help="Cached precomputed assignment file to use instead of default from pangolin-assignment repository. Does not require installation of pangolin-assignment.")

m_group = parser.add_argument_group('Misc options')
m_group.add_argument("--aliases", action='store_true', default=False, help="Print Pango alias_key.json and exit.")
m_group.add_argument("-v","--version", action='version', version=f"pangolin {__version__}")
m_group.add_argument("-pv","--pangolin-data-version", action='version', version=f"pangolin-data {pangolin_data.__version__}",help="show version number of pangolin data files (UShER tree and pangoLEARN model files) and exit.")
m_group.add_argument("-pv","--pangolin-data-version", action='store_true', help="show version number of pangolin data files (UShER tree and pangoLEARN model files) and exit.")
m_group.add_argument("--all-versions", action='store_true',dest="all_versions", default=False, help="Print all tool, dependency, and data versions then exit.")
m_group.add_argument("--verbose",action="store_true",help="Print lots of stuff to screen")
m_group.add_argument("-t","--threads",action="store",default=1,type=int, help="Number of threads")
Expand All @@ -100,15 +84,27 @@ def main(sysargs = sys.argv[1:]):
else:
args = parser.parse_args(sysargs)

if args.datadir and args.update:
sys.stderr.write(cyan(f'Error: incompatible options --datadir and --update; use --update-data to update the --datadir\n'))
sys.exit(-1)

# Initialise config dict
config = setup_config_dict(cwd)
data_checks.check_install(config)
set_up_verbosity(config)

if args.usher:
sys.stderr.write(cyan(f"--usher is a pangolin v3 option and is deprecated in pangolin v4. UShER is now the default analysis mode. Use --analysis-mode to explicitly set mode.\n"))
# Parsing analysis mode flags to return one of 'usher' or 'pangolearn'
config[KEY_ANALYSIS_MODE] = set_up_analysis_mode(args.analysis_mode, config[KEY_ANALYSIS_MODE])
# add flag to config for whether to run scorpio
if args.skip_scorpio or not args.query:
# If scorpio analysis is disabled via command line flag or if there is
# no query to analyze, there is no need to fail if scorpio or
# constellations cannot be resolved.
config[KEY_SKIP_SCORPIO] = True

setup_data(args.datadir,config[KEY_ANALYSIS_MODE], config, args.use_old_datadir)
setup_data(args.datadir, config, args.use_old_datadir)

if args.add_assignment_cache:
update.install_pangolin_assignment(config[KEY_PANGOLIN_ASSIGNMENT_VERSION], args.datadir)
Expand All @@ -117,46 +113,44 @@ def main(sysargs = sys.argv[1:]):
version_dictionary = {'pangolin': __version__,
'pangolin-data': config[KEY_PANGOLIN_DATA_VERSION],
'constellations': config[KEY_CONSTELLATIONS_VERSION],
'scorpio': config[KEY_SCORPIO_VERSION]}
if config[KEY_PANGOLIN_ASSIGNMENT_VERSION] is not None:
version_dictionary['pangolin-assignment'] = config[KEY_PANGOLIN_ASSIGNMENT_VERSION]
'scorpio': config[KEY_SCORPIO_VERSION],
'pangolin-assignment': config[KEY_PANGOLIN_ASSIGNMENT_VERSION]}
update.update(version_dictionary)

if args.update_data:
version_dictionary = {'pangolin-data': config[KEY_PANGOLIN_DATA_VERSION],
'constellations': config[KEY_CONSTELLATIONS_VERSION]}
if config[KEY_PANGOLIN_ASSIGNMENT_VERSION] is not None:
version_dictionary['pangolin-assignment'] = config[KEY_PANGOLIN_ASSIGNMENT_VERSION]
'constellations': config[KEY_CONSTELLATIONS_VERSION],
'pangolin-assignment': config[KEY_PANGOLIN_ASSIGNMENT_VERSION]}
update.update(version_dictionary, args.datadir)

if args.pangolin_data_version:
print("pangolin-data", config[KEY_PANGOLIN_DATA_VERSION] or 'not installed')
sys.exit(0)

if args.all_versions:
print_versions_exit(config)

# install_pangolin_assignment doesn't exit so that --update/--update-data can be given at the
# same time (or a query file). If --add-assignment-cache is the only arg, exit without error.
if args.add_assignment_cache and not args.query:
sys.exit(0)

# add flag to config for whether to run scorpio
# everything below this point will require a resolved pangolin-data source
if config[KEY_PANGOLIN_DATA_VERSION] is None:
install_error("pangolin_data", "https://github.com/cov-lineages/pangolin-data.git")

config[KEY_DESIGNATION_CACHE],config[KEY_ALIAS_FILE] = data_checks.find_designation_cache_and_alias(config[KEY_DATADIR],DESIGNATION_CACHE_FILE,ALIAS_FILE)
if args.aliases:
print_alias_file_exit(config[KEY_ALIAS_FILE])

if args.skip_scorpio:
print(green(f"****\nPangolin skipping scorpio steps.\n****"))
config[KEY_SKIP_SCORPIO] = True

if args.expanded_lineage:
print(green(f"****\nAdding expanded lineage column to output.\n****"))
config[KEY_EXPANDED_LINEAGE] = True

# Parsing analysis mode flags to return one of 'usher' or 'pangolearn'
config[KEY_ANALYSIS_MODE] = set_up_analysis_mode(args.analysis_mode, config[KEY_ANALYSIS_MODE])

snakefile = get_snakefile(thisdir,config[KEY_ANALYSIS_MODE])

config[KEY_DESIGNATION_CACHE],config[KEY_ALIAS_FILE] = data_checks.find_designation_cache_and_alias(config[KEY_DATADIR],DESIGNATION_CACHE_FILE,ALIAS_FILE)
if args.aliases:
print_alias_file_exit(config[KEY_ALIAS_FILE])

if args.all_versions:
print_versions_exit(config)

# to enable not having to pass a query if running update
# by allowing query to accept 0 to many arguments

print(green(f"****\nPangolin running in {config[KEY_ANALYSIS_MODE]} mode.\n****"))
if config[KEY_ANALYSIS_MODE] == "scorpio":
Expand Down
8 changes: 2 additions & 6 deletions pangolin/scripts/preprocessing.smk
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ rule scorpio:
input:
fasta = rules.create_seq_hash.output.fasta,
params:
constellation_files = " ".join(config[KEY_CONSTELLATION_FILES]),
skip_scorpio = config[KEY_SKIP_SCORPIO]
output:
report = os.path.join(config[KEY_TEMPDIR],"VOC_report.scorpio.csv")
Expand All @@ -89,28 +88,25 @@ rule scorpio:
if params.skip_scorpio:
shell("touch {output.report:q}")
else:
shell("scorpio classify \
shell("python -m scorpio classify \
-i {input.fasta:q} \
-o {output.report:q} \
-t {workflow.cores} \
--output-counts \
--constellations {params.constellation_files} \
--pangolin \
--list-incompatible \
--long &> {log:q}")

rule get_constellations:
params:
constellation_files = " ".join(config[KEY_CONSTELLATION_FILES]),
skip_scorpio = config[KEY_SKIP_SCORPIO]
output:
list = os.path.join(config[KEY_TEMPDIR], "get_constellations.txt")
run:
if params.skip_scorpio:
shell("touch {output.list:q}")
else:
shell("scorpio list \
--constellations {params.constellation_files} \
shell("python -m scorpio list \
--pangolin > {output.list:q}")

rule sequence_qc:
Expand Down
7 changes: 2 additions & 5 deletions pangolin/utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@

KEY_EXPANDED_LINEAGE="expanded_lineage"

KEY_CONSTELLATION_FILES="constellation_files"
KEY_USHER_PB = "usher_pb"
KEY_PLEARN_MODEL = "plearn_model"
KEY_PLEARN_HEADER = "plearn_header"
Expand All @@ -38,7 +37,8 @@

KEY_PANGOLIN_DATA_VERSION="pangolin_data_version"
KEY_PANGOLIN_VERSION="pangolin_version"
KEY_CONSTELLATIONS_VERSION="constellation_version"
KEY_CONSTELLATIONS_VERSION="constellations_version"
KEY_CONSTELLATIONS_PATH="constellations_path"
KEY_SCORPIO_VERSION="scorpio_version"
KEY_PANGOLIN_ASSIGNMENT_VERSION="pangolin_assignment_version"
KEY_PANGOLIN_ASSIGNMENT_PATH="pangolin_assignment_path"
Expand Down Expand Up @@ -77,6 +77,3 @@
"lineageTree.pb":KEY_USHER_PB
}

# Dependencies
dependency_list = ["gofasta","minimap2","snakemake"]
module_list = ["Bio","sklearn","pandas","joblib","pangoLEARN","constellations"]
8 changes: 0 additions & 8 deletions pangolin/utils/data_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,13 +114,5 @@ def get_assignment_cache(cache_file, config):
sys.exit(-1)
return cache

def get_constellation_files(path):
constellation_files = []
for r, _, f in os.walk(path):
for fn in f:
if (r.endswith('/constellations') or r.endswith('/constellations/definitions')) and fn.endswith('.json'):
constellation_files.append(os.path.join(r, fn))
return constellation_files

# config={}
# check_install()
60 changes: 0 additions & 60 deletions pangolin/utils/dependency_checks.py

This file was deleted.

54 changes: 29 additions & 25 deletions pangolin/utils/initialising.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,27 @@
from pangolin.utils.data_checks import *
from pangolin import __version__

import pangolin_data
class PangolinAssignmentWrapper():
class PangolinDependencyStub():
__version__ = None
__path__ = [None]

try:
import pangolin_data
except ImportError:
pangolin_data = PangolinDependencyStub()
try:
import pangolin_assignment
except ImportError:
# if we can't import the module, leave the variables we replace it with a mock with suitable attributes
pangolin_assignment = PangolinAssignmentWrapper()
import scorpio
import constellations
pangolin_assignment = PangolinDependencyStub()
try:
import scorpio
except ImportError:
scorpio = PangolinDependencyStub()
try:
import constellations
except ImportError:
constellations = PangolinDependencyStub()

def setup_config_dict(cwd):
default_dict = {
Expand All @@ -41,8 +51,6 @@ def setup_config_dict(cwd):

KEY_TEMPDIR:None,
KEY_NO_TEMP:False,

KEY_DATADIR:None,

KEY_MAXAMBIG: 0.3,
KEY_TRIM_START:265, # where to pad to using datafunk
Expand All @@ -54,14 +62,14 @@ def setup_config_dict(cwd):

KEY_EXPANDED_LINEAGE: False,

KEY_CONSTELLATION_FILES: [],

KEY_INPUT_COMPRESSION_TYPE: "plaintext",

KEY_PANGOLIN_VERSION: __version__,
KEY_PANGOLIN_DATA_VERSION: pangolin_data.__version__,
KEY_DATADIR: pangolin_data.__path__[0],
KEY_SCORPIO_VERSION: scorpio.__version__,
KEY_CONSTELLATIONS_VERSION: constellations.__version__,
KEY_CONSTELLATIONS_PATH: constellations.__path__[0],
KEY_PANGOLIN_ASSIGNMENT_VERSION: pangolin_assignment.__version__,
KEY_PANGOLIN_ASSIGNMENT_PATH: pangolin_assignment.__path__[0],

Expand Down Expand Up @@ -128,15 +136,8 @@ def version_from_init(init_file):
break
return version

def setup_data(datadir_arg, analysis_mode, config, use_old_data):
def setup_data(datadir_arg, config, use_old_data):
datadir = check_datadir(datadir_arg)

config[KEY_PANGOLIN_DATA_VERSION] = pangolin_data.__version__
config[KEY_DATADIR] = pangolin_data.__path__[0]
config[KEY_CONSTELLATIONS_VERSION] = constellations.__version__
config[KEY_CONSTELLATION_FILES] = get_constellation_files(constellations.__path__[0])
config[KEY_PANGOLIN_ASSIGNMENT_VERSION] = pangolin_assignment.__version__
config[KEY_PANGOLIN_ASSIGNMENT_PATH] = pangolin_assignment.__path__[0]

if datadir:
for module_name in ('constellations', 'pangolin_data', 'pangolin_assignment'):
Expand All @@ -155,10 +156,16 @@ def setup_data(datadir_arg, analysis_mode, config, use_old_data):
config[KEY_PANGOLIN_ASSIGNMENT_PATH] = os.path.join(datadir, r)
elif module_name == "constellations":
config[KEY_CONSTELLATIONS_VERSION] = version
config[KEY_CONSTELLATION_FILES] = get_constellation_files(r)
config[KEY_CONSTELLATIONS_PATH] = os.path.join(datadir, r)
else:
sys.stderr.write(cyan(f"Warning: Ignoring {module_name} in specified datadir {datadir} - it contains {module_name} with older ({version}) than those installed ({current_version})\n"))

if not config[KEY_SKIP_SCORPIO]:
if scorpio.__version__ is None:
install_error("scorpio", "https://github.com/cov-lineages/scorpio.git")
if config[KEY_CONSTELLATIONS_VERSION] is None:
install_error("constellations", "https://github.com/cov-lineages/constellations.git")

def parse_qc_thresholds(maxambig, minlen, reference_fasta, config):

if maxambig:
Expand Down Expand Up @@ -215,13 +222,10 @@ def print_conda_version(pkg_list):
sys.stderr.write(cyan(f"version not found in output of 'conda list {pkg}':\n{output}\n"))

def print_versions_exit(config):
print(f"pangolin: {config[KEY_PANGOLIN_VERSION]}\n"
f"pangolin-data: {config[KEY_PANGOLIN_DATA_VERSION]}\n"
f"constellations: {config[KEY_CONSTELLATIONS_VERSION]}\n"
f"scorpio: {config[KEY_SCORPIO_VERSION]}")
# Report pangolin_assignment version if it is installed, otherwise ignore
if config[KEY_PANGOLIN_ASSIGNMENT_VERSION] is not None:
print(f"pangolin-assignment: {config[KEY_PANGOLIN_ASSIGNMENT_VERSION]}")
packages = ["pangolin", "pangolin-data", "constellations", "scorpio", "pangolin-assignment"]
version_keys = [KEY_PANGOLIN_VERSION, KEY_PANGOLIN_DATA_VERSION, KEY_CONSTELLATIONS_VERSION, KEY_SCORPIO_VERSION, KEY_PANGOLIN_ASSIGNMENT_VERSION]
for pkg, version_key in zip(packages, version_keys):
print(pkg, config[version_key] or 'not installed', sep=': ')
# Print versions of other important tools used by pangolin
print_conda_version(['usher', 'ucsc-fatovcf', 'gofasta', 'minimap2'])
sys.exit(0)
Expand Down
Loading