forked from standardebooks/tools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsplit-file
executable file
·46 lines (34 loc) · 1.47 KB
/
split-file
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#!/usr/bin/env python3
import argparse
import os
import regex
import se.epub
def main():
parser = argparse.ArgumentParser(description="Split an XHTML file into many files at all instances of <!--se:split-->, and include a header template for each file.")
parser.add_argument("filename", metavar="FILE", help="an XHTML file")
args = parser.parse_args()
with open(args.filename, "r", encoding="utf-8") as file:
xhtml = se.epub.strip_bom(file.read())
with open(os.path.join(os.path.dirname(os.path.realpath(__file__)), "templates", "header.xhtml"), "r", encoding="utf-8") as file:
header_xhtml = file.read()
chapter_number = 1
chapter_xhtml = ""
#Remove leading split tags
xhtml = regex.sub(r"^\s*<\!--se:split-->", "", xhtml)
for line in xhtml.splitlines():
if "<!--se:split-->" in line:
prefix, suffix = line.split("<!--se:split-->")
chapter_xhtml = chapter_xhtml + prefix
output(chapter_number, header_xhtml, chapter_xhtml)
chapter_number = chapter_number + 1
chapter_xhtml = suffix
else:
chapter_xhtml = chapter_xhtml + "\n" + line
if chapter_xhtml and not chapter_xhtml.isspace():
output(chapter_number, header_xhtml, chapter_xhtml)
def output(chapter_number, header_xhtml, chapter_xhtml):
with open("chapter-" + str(chapter_number) + ".xhtml", "w", encoding="utf-8") as file:
file.write(header_xhtml.replace("NUMBER", str(chapter_number)) + "\n" + chapter_xhtml + "\n</section></body></html>")
file.truncate()
if __name__ == "__main__":
main()