diff --git a/README.md b/README.md index 9f17d11..690a55f 100644 --- a/README.md +++ b/README.md @@ -33,13 +33,13 @@ The workshop will take place in the Randall Rollins Building (RR 201) Slides: [here](https://github.com/JuliaPalacios/SISMID_EvolutionaryDynamics/blob/master/Lectures/2023-07-12_Introduction.pptx?raw=true)
3:30pm - 5:00pm: Lecturer: Nicola. *Getting BEAST2 to run with SARS2*
-Slides: [here](https://github.com/JuliaPalacios/SISMID_EvolutionaryDynamics/blob/master/Lectures/2023-07-12_beast_introduction.pptx?raw=true)
+Slides: [here](https://github.com/JuliaPalacios/SISMID_EvolutionaryDynamics/blob/master/Lectures/2024-07-24_beast_introduction.pptx?raw=true)
Tutorial: [Intro to BEAST2 tutorial](https://github.com/JuliaPalacios/SISMID_EvolutionaryDynamics/blob/master/Tutorials/Introduction-to-BEAST2.zip?raw=true)
**Thursday, July 25: Modeling evolution and tree priors** 8:30am - 10:00am Lecturer: Nicola *Modelling the evolution of genomes through time*
- Slides: [here](https://github.com/JuliaPalacios/SISMID_EvolutionaryDynamics/blob/master/Lectures/2023-07-13-sismid_evol_nfm.pptx?raw=true)
+ Slides: [here](/Users/nmueller/Documents/github/SISMID_EvolutionaryDynamics/Lectures/2024-07-25-sismid_evol_nfm.pptx?raw=true)
Tutorial: [BModelTest](https://github.com/JuliaPalacios/SISMID_EvolutionaryDynamics/blob/master/Tutorials/BModelTest.zip?raw=true)
10:30am - 12:00am: Lecturer: Julia *Coalescent and Birth death tree priors*
@@ -60,7 +60,7 @@ Day 2 additional tutorial suggestions: [Model adequacy](https://taming-the-beast **Friday, July 26: Structured populations, networks, and XML** 8:30am - 10:00am: Lecturer: Nicola *Structured populations*
- Slides: [here](https://github.com/JuliaPalacios/SISMID_EvolutionaryDynamics/blob/master/Lectures/2023-07-14-structured_nfm.pptx?raw=true)
+ Slides: [here](https://github.com/JuliaPalacios/SISMID_EvolutionaryDynamics/blob/master/Lectures/2024-07-26-structured_nfm.pptx?raw=true)
Tutorial: [MASCOT](https://github.com/JuliaPalacios/SISMID_EvolutionaryDynamics/blob/master/Tutorials/Mascot-Tutorial.zip?raw=true)
10:30am - 11:00am: Lecturer: Nicola *Multi-type birth-death models*
@@ -71,7 +71,7 @@ Day 2 additional tutorial suggestions: [Model adequacy](https://taming-the-beast Mini Tutorial [ggtree](https://github.com/JuliaPalacios/SISMID_EvolutionaryDynamics/blob/master/Tutorials/mascot_ggtree.zip?raw=true) 11:40am - 12:00am: Lecturer: Nicola and Julia *How does phylodynamics relate to transmission dynamics?*
- Slides: [here](https://github.com/JuliaPalacios/SISMID_EvolutionaryDynamics/blob/master/Lectures/2024-07-26-relation_phylodynamics_epidemiology.pptx)
+ Slides: [here](https://github.com/JuliaPalacios/SISMID_EvolutionaryDynamics/blob/master/Lectures/2024-07-26-relation_phylodynamics_epidemiology.pptx?raw=true)
1:30pm - 3:00pm: Lecturer: Nicola: *Recombination and Reassortment*
Slides: [here](https://github.com/JuliaPalacios/SISMID_EvolutionaryDynamics/blob/master/Lectures/2023-07-14-sismid_network_nfm.pptx?raw=true)
diff --git a/Tutorials/ParseMdtoLatex.py b/Tutorials/ParseMdtoLatex.py new file mode 100644 index 0000000..d64c5b7 --- /dev/null +++ b/Tutorials/ParseMdtoLatex.py @@ -0,0 +1,554 @@ +import os, re, sys, yaml +from subprocess import call +from optparse import OptionParser + +################################################################################################################################ +# Markdown to Latex parser +# +# This script parses tutorials written in Markdown to Latex. It assumes that Pandoc is installed. +# Although Pandoc does most of the conversion, the script does some pre- and post-processing to better convert +# difficult tags and things specific to the site's Markdown tutorials (figures, references etc.). +# +# The script can also compile the resulting .tex file into .pdf and remove all temporary files produced. +# +# WARNING +# This is NOT a proper recursive parser. It is just a hastily written script that helps to convert files. It +# was a trade-off between the time it takes to convert things manually and the time it would take to write a +# real parser. After running the script the file should always still be inspected by hand! +# +# Details +# - Tables are not converted and should be done by hand +# - Some unicode characters are not correctly converted (this is an issue with pdflatex) +# - It is assumed that images are in html
tags in the Markdown tutorial +# - The only image sizing that is recognized is width as a percentage of the panel +# - If a different image size or no image size is specified the image is displayed at its actual size, +# unless it is bigger than the page margins, in which case the image will be resized to fit within the +# margins. +# - Horizontal lines will be removed and replaced with new pages. This could be an issue when code blocks +# contain series of dashes (----) as these will also be removed. +# +# TODO: +# - Contents of figure captions are not always parsed correctly (if it contains math etc.) +# +################################################################################################################################ +# Parameters +################################################################################################################################ + +usage = "usage: %prog [option]" +parser = OptionParser(usage=usage) + + +parser.add_option("-i","--inputfile", + dest = "inputfile", + default = "", + metavar = "file", + help = "Input file [required]") + +parser.add_option("-o","--outputfile", + dest = "outputfile", + default = "", + metavar = "file", + help = "Name of the output file (by default it is the same as the input) [required]") + +parser.add_option("-t","--template", + dest = "template", + default = "", + metavar = "file", + help = "Pandoc template file to use (if not specified use pandoc default) [required]") + +parser.add_option("-T","--title", + dest = "title", + default = "Untitled tutorial", + metavar = "string", + help = "Title of tutorial in quotes (if not already in yaml header) [required]") + +parser.add_option("-S","--subtitle", + dest = "subtitle", + default = "", + metavar = "string", + help = "Subtitle of tutorial in quotes (if not already in yaml header) [required]") + +parser.add_option("-V","--version", + dest = "version", + default = "2.x", + metavar = "string", + help = "Version of BEAST the tutorial is written for (if not already in yaml header) [required]") + +parser.add_option("-r","--removefiles", + action = "store_true", + default = False, + dest = "remove", + metavar = "boolean", + help = "Remove temporary files (default=%default) [required]") + +parser.add_option("-L","--latex", + action = "store_true", + default = False, + dest = "latex", + metavar = "boolean", + help = "Compile output file with pdflatex (default=%default) [required]") + + +(options,args) = parser.parse_args() + +inputfile = os.path.abspath(options.inputfile) +outputfile = os.path.abspath(options.outputfile) if options.outputfile != "" else inputfile[:inputfile.rfind('.')]+".tex" +template = os.path.abspath(options.template) if options.template != "" else "" + +title = options.title +subtitle = options.subtitle +version = options.version + +remove = options.remove +latex = options.latex + +signature = "% This file was created (at least in part) by the script ParseMdtoLatex by Louis du Plessis\n" \ + + "% (Available from https://github.com/taming-the-beast)\n\n" + +################################################################################################################################ + + +def getPandocCall(inputfile, outputfile, template=""): + call = ["pandoc"] + + # Output file + call.append("-o") + call.append(outputfile) + + # Standalone file + call.append("-s") + if (template != ""): + call.append("--template") + call.append(template) + + # Do not parse html blocks + #call.append("-R") + + # Use listings package for code blocks + call.append("--listings") + + # Input file + call.append(inputfile) + + return(call) +# + + +def getYamlHeader(text): + + match = re.match("(\A---.*?)---",text, re.DOTALL) + + header = yaml.load(match.groups(0)[0]) + if (title != "Untitled tutorial"): + header["title"] = title + + if (subtitle != ""): + header["subtitle"] = subtitle + + if ("beastversion" not in header): + header["beastversion"] = version + + if ("author" in header.keys()): + authors = header["author"].split(",") + if (len(authors) > 1): + header["author"] = ", ".join(authors[:-1]) + " and " + authors[-1] + else: + header["author"] = "Anonymous" + + return((header,text[match.end():])) +# + + +def parseLiquid(text, header=None): + + start = 0 + while (True): + + # Match the next liquid tag + # {% (tag) (content) %} + # Can be multiline + match = re.search(r'{%\s(.*?)\s(.*?)%}', text, re.DOTALL) + + # No more matches + if (match == None): + break + + (tag,content) = match.groups() + + # Process tags + if (tag == "eq"): + replacement = "\\begin{equation}\n\t%s\n\end{equation}" % content.strip() + elif (tag == "eqinline"): + replacement = "`$ %s $`" % content.strip() + #replacement = "" + elif (tag == "cite"): + replacement = "\citep{%s}" % content[:content.find("--")].strip() + elif (tag == "bibliography"): + parts = content.split() + for i in range(0,len(parts)): + if (parts[i] == "--file"): + bibfile = parts[i+1] + + if (bibfile.find('/') > 0): + bibfile = bibfile[bibfile.rfind('/')+1:] + + if (header != None): + header["bibtex"] = bibfile + + replacement = "" + else: + sys.stdout.write("WARNING Unsupported tag: %s\n" % (text[match.start():match.end()])) + sys.stdout.write("skipping...\n") + replacement = "" + + text = text[:match.start()] + replacement + text[match.end():] + + return(text) +# + + +def parseFigures(text): + + start = 0 + while (True): + + # Match the next html figure tag + # Can be multiline + match = re.search(r'
.*?.*?
(.*?)
.*?
', text, re.DOTALL) + + # No more matches + if (match == None): + break + + (label, figure, caption) = match.groups() + + # Process \includegraphics + scale = "[max width=\\textwidth, max height=0.9\\textheight]" + for part in figure.strip().split(): + (tag,content) = part.split("=") + + if (tag == "src"): + figfile = content.replace('"',"") + + if (tag == "style"): + if (content[1:6].lower() == "width" and content[-3:-1] == '%;'): + mult = float(content[content.find(':')+1:content.find('%')])/100 + scale = "[width=%.6f\\textwidth]" % mult + else: + sys.stdout.write("WARNING Unsupported image style specification %s in '%.20s...'\n" % (part, caption)) + sys.stdout.write("skipping...\n") + + # Process \caption + # TODO: Should actually run the caption through pandoc to parse the caption body text + caption = caption.replace("%","\%") + if (re.match(r'[f|F]igure\s\d+:',caption)): + caption = caption[caption.find(':')+1:].strip() + + replacement = "\\begin{figure}\n\t\centering\n" + replacement += "\t\includegraphics%s{%s}\n" % (scale, figfile) + replacement += "\t\caption{%s}\n" % caption.strip() + replacement += "\t\label{%s}\n" % label.strip() + replacement += "\end{figure}\n" + + text = text[:match.start()] + replacement + text[match.end():] + + return(text) +# + +# Assumes each figure contains only one image +def parseFiguresStepwise(text): + + start = 0 + while (True): + + # Match the next html figure tag + # Can be multiline + match = re.search(r'
(.*?)
', text, re.DOTALL) + + # No more matches + if (match == None): + break + + label = re.search(r'', match.groups()[0], re.DOTALL) + caption = re.search(r'
(.*?)
', match.groups()[0], re.DOTALL) + + + replacement = "\\begin{figure}\n\t\centering\n" + + # Process \includegraphics + if (figure != None): + + scale = "[max width=\\textwidth, max height=0.9\\textheight]" + for part in figure.groups()[0].strip().split(): + (tag,content) = part.split("=") + + if (tag == "src"): + figfile = content.replace('"',"") + + if (tag == "style"): + if (content[1:6].lower() == "width" and content[-3:-1] == '%;'): + mult = float(content[content.find(':')+1:content.find('%')])/100 + scale = "[width=%.6f\\textwidth]" % mult + else: + sys.stdout.write("WARNING Unsupported image style specification %s in '%.20s...'\n" % (part, caption)) + sys.stdout.write("skipping...\n") + + replacement += "\t\includegraphics%s{%s}\n" % (scale, figfile) + # + + # Process \caption + if (caption != None): + # Should actually run the caption through pandoc to parse the caption body text + captionstr = caption.groups()[0].replace("%","\%") + if (re.match(r'[f|F]igure\s\d+:',captionstr)): + captionstr = captionstr[captionstr.find(':')+1:] + + replacement += "\t\caption{%s}\n" % captionstr.strip() + # + + # Process label + if (label != None): + replacement += "\t\label{%s}\n" % label.groups()[0].strip() + + replacement += "\end{figure}\n" + + text = text[:match.start()] + replacement + text[match.end():] + + return(text) +# + + +def parseFigureRefs(text): + + start = 0 + while (True): + + # Match the next figure reference + # [Figure (number)](#(label)) + # Can be multiline + match = re.search(r'\[Figure\s\d+\]\(#(.+?)\)', text, re.DOTALL) + + # No more matches + if (match == None): + break + + label = match.groups(0)[0] + + replacement = "Figure \\ref{%s}" % label.strip() + text = text[:match.start()] + replacement + text[match.end():] + + return(text) +# + + +def removeMdLines(text): + + start = 0 + while (True): + + # Match the next horizontal line + # Three or more dashes (---) + # Can be multiline + match = re.search(r'\n\n---*', text, re.DOTALL) + + # No more matches + if (match == None): + break + + text = text[:match.start()] + "\n\clearpage\n" + text[match.end():] + + return(text) +# + + +def formatInlineMath(text): + + start = 0 + while (True): + + # Match the next inline equation + # \lstinline!$(math)$! + # Can be multiline + match = re.search(r'\\lstinline\!\$(.+?)\$\!', text, re.DOTALL) + + # No more matches + if (match == None): + break + + label = match.groups(0)[0] + + replacement = "$ %s $" % label.replace("\\\\","\\").strip() + text = text[:match.start()] + replacement + text[match.end():] + + return(text) +# + + +def formatSuperscript(text): + + super_regex = [r'(\\\^\{\}\((.+?)\))([^\)])', r'(\\\^\{\}(\w+?))(\W)'] + + for regex in super_regex: + + start = 0 + while (True): + + # Match the next text superscript (no parentheses) + # \^{}(superscript) + # Can be multiline + #match = re.search(r'\\\^\{\}(\w+?)\W', text, re.DOTALL) + #match = re.search(r'\\\^\{\}[\((.+?)\)[^\)]', text, re.DOTALL) + match = re.search(regex, text, re.DOTALL) + + # No more matches + if (match == None): + break + + replacement = "$^{%s}$%s" % (match.groups(1)[1], match.groups(1)[2]) + text = text[:match.start()] + replacement + text[match.end():] + + return(text) +# + + +def removeRefSection(text): + + start = 0 + while (True): + + # Remove the heading relevant references at the end + # \section{Relevant References}\label{relevant-references} + # Can be multiline + match = re.search(r'\\section\{Relevant References\}\\label{relevant-references}', text, re.DOTALL & re.IGNORECASE) + + # No more matches + if (match == None): + break + + text = text[:match.start()] + text[match.end():] + + return(text) +# + + + + +################################################################################################################################ + +################## +# Pre-processing # +################## + +text = open(inputfile,'r').read() + +# Read header +(header, text) = getYamlHeader(text) + +# Parse liquid tags +text = parseLiquid(text, header=header) + +# Parse figures in html tags +text = parseFiguresStepwise(text) + +# Parse figure references +text = parseFigureRefs(text) + +# Remove horizontal lines +text = removeMdLines(text) + + + +# Add header to text and save temporary file +outfile = open(inputfile[:inputfile.rfind('.')]+"-temp"+inputfile[inputfile.rfind('.'):],'w', encoding='utf-8') + + +outfile.write("---\n") +#for key in header: +# outfile.write("%s : %s\n" % (key, header[key])) +yaml.dump(header, outfile, default_flow_style=False, encoding='utf-8') + +outfile.write("---\n") +outfile.write(text) +print('lalala') + +outfile.close() +print('lalala') + +print(inputfile) + +##################### +# Pandoc conversion # +##################### +call(getPandocCall(inputfile[:inputfile.rfind('.')]+"-temp"+inputfile[inputfile.rfind('.'):], + inputfile[:inputfile.rfind('.')]+"-temp.tex", template=template)) + + +print('lalala') + +################### +# Post-processing # +################### + +text = open(inputfile[:inputfile.rfind('.')]+"-temp.tex",'r').read() + +# Framed boxes (quotes) +text = text.replace("\\begin{quote}","\\begin{framed}").replace("\end{quote}","\end{framed}") + +# Format inline math (not properly converted by pandoc) +text = formatInlineMath(text) + +# Format text superscripts (does not work perfectly) +text = formatSuperscript(text) + +# Remove relevant references heading +text = removeRefSection(text) + +# Remove unknown latex tags that pandoc inserts +# remove \toprule, \bottomrule, \tightlist +text = text.replace("\\toprule","").replace("\\bottomrule","").replace("\\tightlist","") + + +outfile = open(outputfile,'w',encoding='utf-8') +outfile.write(signature + text) +outfile.close() + + +# Remove temporary files +if (remove): + os.remove(inputfile[:inputfile.rfind('.')]+"-temp.tex") + os.remove(inputfile[:inputfile.rfind('.')]+"-temp"+inputfile[inputfile.rfind('.'):]) + + + +################# +# Compile latex # +################# + +if (latex): + outputpath = outputfile[:outputfile.rfind('/')]+"/" + filename = outputfile[outputfile.rfind('/')+1:outputfile.rfind('.')] + + here = os.getcwd() + os.chdir(outputpath) + + call(['pdflatex',filename+'.tex']) + if ('bibtex' in header.keys()): + print (['bibtex',filename]) + call(['bibtex',filename]) + call(['pdflatex',filename+'.tex']) + call(['pdflatex',filename+'.tex']) + + # Remove temporary files + if (remove): + os.remove(filename+".aux") + os.remove(filename+".log") + os.remove(filename+".out") + os.remove(filename+".run.xml") + if ('bibtex' in header.keys()): + os.remove(filename+"-blx.bib") + os.remove(filename+".bbl") + os.remove(filename+".blg") + + os.chdir(here) +