- Overview
- Contributors
- Raw Data Processing
- Homolog Inference
- Ortholog Inference
- Alignment construction
- Phylogeny Construction
- Introgression Analysis
- Ancestral Segregating Allele Analysis
- Adaptive Evolution Analysis
- Raw scripts/Pipeline for the "Jaltomato Phylogenomics" Project.
- Some scripts were written by Ya Yang for her study (https://bitbucket.org/yangya/phylogenomic_dataset_construction).
- Scripts associated with MVF-format data processsing can be found in mvftools (https://github.com/jbpease/mvftools).
- Some scripts could/should be splited for multiple runs (need to check before submitting)
- The paper 💥 associated with this project has been accepted by Molecular Ecology (bioRxiv 201376)
- Still in updating!
qsub trim.sh
qsub clip5end.sh
qsub FastaQC.sh
qsub trinity.sh
qsub transdecoder.sh
for file in *_dir; do cp $file/longest_orfs.cds outDIR/$file'.cds'; done
python fix_names_from_transdecoder.py <DIR> <DIR>
cat *_NR.fa *_RP.fa > *_cds.fa
qsub cd-hit-est.sh
qsub blastn.sh
cat *blastn > all.rawblast
python blast_to_mcl.py all.rawblast <hit_fraction_cutoff>
mcl all.rawblast.hit-frac0.4.minusLogEvalue --abc -te 5 -tf 'gq(10)' -I 2.5 -o hit-frac0.4_I2.5_e10
python write_fasta_files_from_mcl.py <fasta files> <mcl_outfile> <minimal_ingroup_taxa> <outDIR>
qsub mafft.sh
qsub phyutility.sh
qsub fasttree.sh
python cut_long_branches_iter.py <inDIR> <outDIR>
qsub mafft.sh
qsub phyutility.sh
qsub raxml.sh
Cut long internal branches, trim spurious tips and mask monophyletic/paraphyletic tips of the same taxon
python cut_long_internal_branches.py <inDIR> <internal_branch_length_cutoff> <minimal_taxa> <outDIR>
python trim_tips.py <treDIR> <outDIR> <relative_cutoff> <absolute_cutoff1> <absolute_cutoff2>
python mask_tips_by_taxonID_transcripts.py <treDIR> <aln-clnDIR> <outDIR>
python prune_paralogs_MI.py <homologDIR> <tree_ending> <relative_tip_cutoff> <absolute_tip_cutoff> <minimal_taxa> <outDIR>
python write_ortholog_fasta_files.py <fasta file with all seqs> <ortholog tree DIR> <outDIR> <MIN_TAXA>
python cluster_gene_ID.py <inDIR> <treDIR> <outDIR>
python CapsellaOrtholog.py <inDIR> Tomato_Capsella.txt Capsicum.annuum.L_Zunla-1_v2.0_CDS.fa <outDIR>
python directory_subpackage.py <inDIR> <num_subdir> .fa
qsub guidance.sh
for file in Solyc*; do cp $file/MSA.PRANK.Without_low_SP_Col.With_Names outDIR/$file; done
python find_unprocessed_files.py <processedDIR> <originalDIR> <unprocessedDIR>
qsub mask_bySW.sh
python orf_aln_process.py <inDIR> <outDIR> -s Capana -d 15
python3.3 fasta2mvf.py --fasta alignments_Dir/* --out genes_mvf --contigbyfile --overwrite
python3.3 mvf_analyze_dna.py --mvf genes_mvf --out genetic_dist PairwiseDistanceWindow
qsub raxml_concatenate.sh
module load phylip; consense
raxmlHPC -L MRE -z genetrees.tre -m GTRCAT -n T1
qsub astral.sh
python seqformat_converter.py <inDIR> <outDIR> .phy .nex
qsub bucky.sh
rstrip phylo_construct.R
python3.3 fasta2mvf.py --fasta <concatenated_fasta> --out transcriptome --overwrite
python ABBA_trio.py
qsub introgression_trios.sh
python ABBA_parse.py -mvf MVF_FILE -test pairwise
sh speciesID.sh
python3.3 mvf_analyze_dna.py --mvf transcriptome --out SIN_CAL_DAR_PRO --samples JA0702 JA0711 JA0694 JA0456 Solyc --windowsize 6201996 PatternCount
python dfoil.py --out myfile --infile SIN_CAL_DAR_PRO —pvalue 0.00001
qsub mapping.sh
qsub snp_call.sh
python mvf_join.py --mvf SL2.50ch00.mvf SL2.50ch01.mvf SL2.50ch02.mvf SL2.50ch03.mvf SL2.50ch04.mvf SL2.50ch05.mvf SL2.50ch06.mvf SL2.50ch07.mvf SL2.50ch08.mvf SL2.50ch09.mvf SL2.50ch10.mvf SL2.50ch11.mvf SL2.50ch12.mvf --out combined.mvf
python ancestral_variation.py -i comibined.mvf -t species_hetero
python ancestral_variation.py -i comibined.mvf -t shared_hetero
python ancestral_variation.py -i comibined.mvf -t shared_snp
qsub ILS_trios.sh
python orf_aln_process.py -i <inDIR> -o <outDIR> -s JA0010
grep -lir 'Capana' ./ | xargs mv -t <outDIR>
python seqformat_converter.py <inDIR> <outDIR> .fa .phy
sh edit_phy2.sh
python codemlScript.py <outDIR> <codeml_build> <treeFile>
qsub paml.sh
find */rub -empty -type f
python SWAMP.py -i <inDIR> -b <branchcodes.txt> -t 5 -w 15 -m 50
for file in Solyc*; do cp inDIR/*masked.phy outDIR; done
python orf_aln_process.py -i <inDIR> -o <outDIR> -s seqname -d 14
python3.3 fasta2mvf.py --fasta inDIR/* --out outDIR/Jalt_ortho_dna --contigbyfile --overwrite
python3.3 mvf_translate.py --mvf Jalt_ortho_dna --out Jalt_ortho_codon
qsub mvf_paml.sh
python CombinedPAML.py <NS_out> <Geneoutput> GeneFunction.txt > PAML_final.txt
python3.3 mvf_analyze_codon.py GroupUniqueAlleleWindow --mvf Jalt_noSolyc_codon --out Jalt_nectar --allelegroups RED:JA0432,JA0608,JA0719,JA0726,JA0816,JA0711,JA0798 OTHER:JA0456,JA0701,JA0694,JA0450,JA0723,JA0702 --windowsize -1 --uselabels --speciesgroups PRO:JA0456 REP:JA0701 DAR:JA0694 AUR:JA0450 UMB:JA0432 BIF:JA0608 SIN:JA0702 DEN:JA0719 YUN:JA0723 AIJ:JA0726 INC:JA0816 CAL:JA0711 QUI:JA0798 --branchlrt Geneoutput_nectar --pamltmp PAMLtemp_nectar --startcontig 0 --endcontig 0 --target JA0432 JA0608 JA0719 JA0726 JA0816 JA0711 JA0798 --targetspec 8 --raxmlpath raxmlHPC --allsampletree
qsub ms_sim.sh