Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

{bio}[foss/2022a] FunGAP v1.1.1 w/ Python 3.10.4 #17652

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 105 additions & 0 deletions easybuild/easyconfigs/f/FunGAP/FunGAP-1.1.1-foss-2022a.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
easyblock = 'Tarball'

name = 'FunGAP'
version = '1.1.1'

homepage = 'https://github.com/CompSynBioLab-KoreaUniv/FunGAP'
description = "Fungal Genome Annotation Pipeline using evidence-based gene model evaluation."

toolchain = {'name': 'foss', 'version': '2022a'}

# Tag v1.1.1 points to an old commit still in v1.1.0
# pull correct sources from specific commit
_commit = '924f3ba080f98cbf181b0b21601e095619479ce6'

source_urls = ['https://github.com/CompSynBioLab-KoreaUniv/FunGAP/archive']
sources = [{'download_filename': '%s.tar.gz' % _commit, 'filename': SOURCE_TAR_GZ}]
patches = [
'FunGAP-%(version)s_fix-snap-detection.patch',
'FunGAP-%(version)s_relax-dependency-checks.patch',
'FunGAP-%(version)s_replace-deprecated-pa-repeatmodeler.patch',
'FunGAP-%(version)s_fix-maker-exes.patch',
'FunGAP-%(version)s_fix-augustus-calls-in-runbreaker.patch',
]
checksums = [
{'FunGAP-1.1.1.tar.gz': '3d827c4b11452afdd51b71766e0e3193b7efad31db4536606115f2cac0b964c8'},
{'FunGAP-1.1.1_fix-snap-detection.patch': 'f782224ce186e1e2d8953898122b79d616f8a749b00ec662ec5a3fa1903550fa'},
{'FunGAP-1.1.1_relax-dependency-checks.patch': '73f9ae2a20cf03c34f852642a28d3b7e9858250f653ff1e711c3c9a56ae2fd77'},
{'FunGAP-1.1.1_replace-deprecated-pa-repeatmodeler.patch':
'c528aab74a070d6eedc9ff2097b6149e8d36759b9ff93c46302994179a38a774'},
{'FunGAP-1.1.1_fix-maker-exes.patch': 'de344cf45dad047ab46a17462e19ce0789a89071c1484cbdedd9d898f9601bd0'},
{'FunGAP-1.1.1_fix-augustus-calls-in-runbreaker.patch':
'150b6f79f95d0c8abece42231c5dcedf8610a811db9cc63ac6cc047f0dcfdc64'},
]

dependencies = [
('Python', '3.10.4'),
('Perl', '5.34.1'),
('AUGUSTUS', '3.5.0'),
('BamTools', '2.5.2'),
('bcbio-gff', '0.7.0'),
('BRAKER', '2.1.6'),
('BUSCO', '5.4.5'),
('HISAT2', '2.2.1'),
('MAKER', '3.01.04'),
('matplotlib', '3.5.2'),
('networkx', '2.8.4'),
('PfamScan', '1.6'),
('RepeatModeler', '2.0.4'),
('SAMtools', '1.16.1'),
('SNAP-HMM', '20221022'),
('Trinity', '2.15.1'),
('wget', '1.21.3'),
]

exts_defaultclass = 'PythonPackage'
exts_default_options = {
'source_urls': [PYPI_SOURCE],
'download_dep_fail': True,
'use_pip': True,
}

exts_list = [
('markdown2', '2.4.8', {
'checksums': ['90475aca3d9c8e7df6d70c51de5bbbe9edf7fcf6a380bd1044d321500f5445da'],
}),
]

# FunGAP needs the Pfam database (280 MB download, 1.5 GB on disk)
# if it's already available in your system change _pfam_db_dir to its location
_pfam_db_dir = '%(installdir)s/db'
_pfam_db_url = 'https://ftp.ebi.ac.uk/pub/databases/Pfam/current_release'
_pfam_db_install_cmds = [
"mkdir -p %s" % _pfam_db_dir,
"wget -P %s %s/{Pfam-A.hmm.gz,Pfam-A.hmm.dat.gz,active_site.dat.gz}" % (_pfam_db_dir, _pfam_db_url),
"gzip -d %s/*.gz" % _pfam_db_dir,
"hmmpress %s/Pfam-A.hmm" % _pfam_db_dir,
]

postinstallcmds = [
# install Pfam DB if missing
"if [ ! -f %s/Pfam-A.hmm ]; then %s; fi" % (_pfam_db_dir, " && ".join(_pfam_db_install_cmds)),
# execute set_dependencies.py, which creates configuration file and runs some quick tests
("cd %%(installdir)s && ./set_dependencies.py --pfam_db_path %s --genemark_path $EBROOTGENEMARKMINET "
"--maker_path $EBROOTMAKER/bin --snap_path $EBROOTSNAPMINHMM/bin" % _pfam_db_dir),
]

sanity_check_paths = {
'files': ['fungap.py'],
'dirs': ['lib/python%(pyshortver)s/site-packages']
}

modextrapaths = {
'PATH': '',
'PYTHONPATH': ['', 'lib/python%(pyshortver)s/site-packages']
}

modextravars = {
'FUNGAP_DIR': '%(installdir)s',
}

sanity_check_commands = [
('fungap.py', '--help'),
]

moduleclass = 'bio'
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
Set paths to AUGUSTUS installation preferably from its environment variables
Fix location of getAnnoFastaFromJoingenes.py script from AUGUSTUS
see https://github.com/CompSynBioLab-KoreaUniv/FunGAP/pull/98
author: Alex Domingo (Vrije Universirteit Brussel)
--- run_braker.py.orig 2023-04-03 12:07:35.178272000 +0200
+++ run_braker.py 2023-04-03 12:24:48.713953000 +0200
@@ -124,8 +124,19 @@
logger_time.debug('START: BRAKER')

if not os.path.exists(gff3_braker):
- augustus_config_path = os.path.join(
- os.path.dirname(D_CONF['AUGUSTUS_PATH']), '../config')
+ augustus_bin_path = os.environ[
+ 'AUGUSTUS_BIN_PATH'
+ ] or os.path.dirname(D_CONF['AUGUSTUS_PATH'])
+ augustus_config_path = os.environ[
+ 'AUGUSTUS_CONFIG_PATH'
+ ] or os.path.join(
+ os.path.dirname(D_CONF['AUGUSTUS_PATH']), '../config'
+ )
+ augustus_scripts_path = os.environ[
+ 'AUGUSTUS_SCRIPTS_PATH'
+ ] or os.path.join(
+ os.path.dirname(D_CONF['AUGUSTUS_PATH']), '../scripts'
+ )
config_species = os.path.join(
augustus_config_path, 'species', prefix)
species = prefix
@@ -140,7 +151,6 @@
bamtools_path = os.path.dirname(D_CONF['BAMTOOLS_PATH'])
genemark_path = os.path.dirname(D_CONF['GENEMARK_PATH'])
samtools_path = os.path.dirname(D_CONF['SAMTOOLS_PATH'])
- augustus_scripts_path = os.path.dirname(D_CONF['AUGUSTUS_PATH'])
working_dir = os.path.join(output_dir, prefix)
if not os.path.exists(working_dir):
os.mkdir(working_dir)
@@ -153,7 +163,7 @@
braker_bin, fungus_flag, num_cores, adjusted_assembly,
bam_file, species, augustus_config_path, bamtools_path,
genemark_path, samtools_path, working_dir,
- translation_table, augustus_scripts_path, log_braker))
+ translation_table, augustus_bin_path, log_braker))
logger_txt.debug('[Run] %s', command1)
os.system(command1)

@@ -165,7 +175,7 @@

augustus_dir = os.path.dirname(D_CONF['AUGUSTUS_PATH'])
get_anno_script = os.path.join(
- augustus_dir, 'getAnnoFastaFromJoingenes.py')
+ augustus_scripts_path, 'getAnnoFastaFromJoingenes.py')
if not os.path.exists(get_anno_script):
get_anno_script = os.path.join(
augustus_dir,
20 changes: 20 additions & 0 deletions easybuild/easyconfigs/f/FunGAP/FunGAP-1.1.1_fix-maker-exes.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
The paths to MAKER executables set by MAKER are already correct, avoid
tampering with them.
author: Alex Domingo (Vrije Universiteit Brussel)
--- run_maker.py.orig 2023-03-21 09:19:05.057679000 +0100
+++ run_maker.py 2023-03-21 09:19:49.143408491 +0100
@@ -463,14 +463,6 @@
replace('maker_opts.ctl', 'protein_pass=0', 'protein_pass=1')
replace('maker_opts.ctl', 'rm_pass=0', 'rm_pass=1')

- # Program paths
- for program in [
- 'makeblastdb', 'blastn', 'blastx', 'tblastx', 'RepeatMasker',
- 'exonerate', 'snap', 'augustus', 'tRNAscan-SE', 'snoscan']:
- replace('maker_exe.ctl', '{}='.format(program), '{}={}'.format(
- program, os.path.join(os.path.dirname(maker_bin), program)
- ))
-
# Last run, keep_preds=1
if version == '4':
replace('maker_opts.ctl', 'keep_preds=0', 'keep_preds=1')
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
Fix detection of fathom, forge and hmm-assembler.pl, which are not part of MAKER but SNAP
see https://github.com/CompSynBioLab-KoreaUniv/FunGAP/pull/97
author: Alex Domingo (Vrije Universiteit Brussel)
--- set_dependencies.py.orig 2023-03-16 15:27:00.506475000 +0100
+++ set_dependencies.py 2023-03-16 15:31:31.102674857 +0100
@@ -44,6 +44,10 @@
help='Maker bin path'
)
parser.add_argument(
+ '-s', '--snap_path', nargs=1, required=True,
+ help='SNAP-HMM bin path'
+ )
+ parser.add_argument(
'-r', '--with_repeat_modeler', nargs='?', default='',
help='User-defined RepeatModeler bin path'
)
@@ -76,6 +80,7 @@
pfam_db_path = os.path.abspath(args.pfam_db_path[0])
i_genemark_path = os.path.abspath(args.genemark_path[0])
i_maker_path = os.path.abspath(args.maker_path[0])
+ i_snap_path = os.path.abspath(args.snap_path[0])
if args.with_repeat_modeler:
with_repeat_modeler = os.path.abspath(args.with_repeat_modeler)
else:
@@ -104,8 +109,9 @@
pfam_scan_path, blastp_path, blastn_path, blastx_path,
makeblastdb_path, samtools_path, bamtools_path, augustus_path
) = get_path(
- i_genemark_path, i_maker_path, with_repeat_modeler, with_augustus,
- with_hisat2, with_trinity, with_braker, with_busco, with_pfam_scan
+ i_genemark_path, i_maker_path, i_snap_path, with_repeat_modeler,
+ with_augustus, with_hisat2, with_trinity, with_braker, with_busco,
+ with_pfam_scan
)
check_working(
genemark_path, gmhmme3_path, probuild_path, build_database_path,
@@ -159,8 +165,9 @@


def get_path(
- i_genemark_path, i_maker_path, with_repeat_modeler, with_augustus,
- with_hisat2, with_trinity, with_braker, with_busco, with_pfam_scan):
+ i_genemark_path, i_maker_path, i_snap_path, with_repeat_modeler,
+ with_augustus, with_hisat2, with_trinity, with_braker, with_busco,
+ with_pfam_scan):
'''Get path'''
print('\n** Checking the installed locations of dependencies **\n')

@@ -198,9 +205,9 @@
gff3_merge_path = check_binary('Maker', i_maker_path, 'gff3_merge')
fasta_merge_path = check_binary('Maker', i_maker_path, 'fasta_merge')
maker2zff_path = check_binary('Maker', i_maker_path, 'maker2zff')
- fathom_path = check_binary('Snap', i_maker_path, 'fathom')
- forge_path = check_binary('Snap', i_maker_path, 'forge')
- hmm_assembler_path = check_binary('Snap', i_maker_path, 'hmm-assembler.pl')
+ fathom_path = check_binary('Snap', i_snap_path, 'fathom')
+ forge_path = check_binary('Snap', i_snap_path, 'forge')
+ hmm_assembler_path = check_binary('Snap', i_snap_path, 'hmm-assembler.pl')
build_database_path = check_binary(
'RepeatModeler (BuildDatabase)', with_repeat_modeler, 'BuildDatabase',
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
Disable too strict check on version of AUGUSTUS and the GeneMark key in the
home directory of the user
author: Alex Domingo (Vrije Universiteit Brusel)
--- set_dependencies.py.orig 2023-03-16 15:50:03.802522000 +0100
+++ set_dependencies.py 2023-03-16 15:50:28.562603082 +0100
@@ -284,15 +284,15 @@
check_working_internal(samtools_path, [samtools_path, '--help'])
check_working_internal(bamtools_path, [bamtools_path, '--help'])
check_working_internal(augustus_path, [augustus_path, '--help'])
- check_augustus_version(augustus_path)
+ # check_augustus_version(augustus_path)

- # For GeneMark, check the .gm_key
- home_path = os.path.expanduser('~')
- if not os.path.exists(os.path.join(home_path, '.gm_key')):
- sys.exit(
- '\n[ERROR] You do not have .gm_key in your home directory.\n'
- 'Check https://wiki.gacrc.uga.edu/wiki/GeneMark'
- )
+ # # For GeneMark, check the .gm_key
+ # home_path = os.path.expanduser('~')
+ # if not os.path.exists(os.path.join(home_path, '.gm_key')):
+ # sys.exit(
+ # '\n[ERROR] You do not have .gm_key in your home directory.\n'
+ # 'Check https://wiki.gacrc.uga.edu/wiki/GeneMark'
+ # )

def check_augustus_version(augustus_path):
'''Check Augustus version 3.3.3'''
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
Replace deprecated -pa option in newer versions of RepeatModeler with -threads
author: Alex Domingo (Vrije Universiteit Brussel)
--- run_repeat_modeler.py.orig 2023-03-17 09:23:21.857374000 +0100
+++ run_repeat_modeler.py 2023-03-17 09:24:17.655942000 +0100
@@ -73,7 +73,7 @@

# BuildDatabase -name Choanephora_cucurbitarum
# ../Choanephora_cucurbitarum_assembly.fna
- # RepeatModeler -database Choanephora_cucurbitarum -pa 25
+ # RepeatModeler -database Choanephora_cucurbitarum -threads 25

# Get repeat model
repeat_lib = os.path.join(output_dir, '*', 'consensi.fa.classified')
@@ -89,7 +89,7 @@
os.system(command1)

log_file2 = os.path.join(log_dir, 'repeat_modeler.log')
- command2 = '{} -database {} -pa {} > {} 2>&1'.format(
+ command2 = '{} -database {} -threads {} > {} 2>&1'.format(
repeatmodeler_bin, genome_assembly, num_cores, log_file2
)
logger_txt.debug('[Run] %s', command2)