forked from selfboot/html2Dash
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhtml2dash.py
executable file
·209 lines (177 loc) · 6.11 KB
/
html2dash.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
#! /usr/bin/env python
# -*- coding: utf-8 -*-
import argparse
import os
import re
import sqlite3
import subprocess
from bs4 import BeautifulSoup
def update_db(name, path):
try:
cur.execute("SELECT rowid FROM searchIndex WHERE path = ?", (path,))
dbpath = cur.fetchone()
cur.execute("SELECT rowid FROM searchIndex WHERE name = ?", (name,))
dbname = cur.fetchone()
if dbpath is None and dbname is None:
cur.execute('INSERT OR IGNORE INTO searchIndex(name, type, path)\
VALUES (?,?,?)', (name, "Section", path))
else:
pass
except:
pass
def add_urls():
index_page = open(os.path.join(docset_path, 'index.html'), encoding='utf-8').read()
soup = BeautifulSoup(index_page, "html.parser")
any = re.compile('.*')
for tag in soup.find_all('a', {'href': any}):
name = tag.text.strip()
if len(name) > 0:
path = tag.attrs['href'].strip()
if path.split('#')[0] not in ('index.html'):
update_db(name, path)
def add_infoplist(info_path, index_page):
name = docset_name.split('.')[0]
print(name)
print(index_page)
info = """
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>CFBundleIdentifier</key>
<string>{0}</string>
<key>CFBundleName</key>
<string>{1}</string>
<key>DashDocSetFamily</key>
<string>{2}</string>
<key>DocSetPlatformFamily</key>
<string>requests</string>
<key>isDashDocset</key>
<true/>
<key>isJavaScriptEnabled</key>
<true/>
<key>dashIndexFilePath</key>
<string>{3}</string>
</dict>
</plist>
""".format(name, name, name, index_page)
try:
print(info_path)
print(info)
print("Create the Info.plist File")
f = open(info_path, 'wb')
f.write(info.encode())
f.close()
except:
print("**Error**: Create the Info.plist File Failed...")
#clear_trash()
exit(2)
def clear_trash():
try:
subprocess.call(["rm", "-r", docset_name])
print("Clear generated useless files!")
except:
print("**Error**: Clear trash failed...")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('-n', '--name',
help='Name the docset explicitly')
parser.add_argument('-d', '--destination',
dest='path',
default='',
help='Put the resulting docset into PATH')
parser.add_argument('-i', '--icon',
dest='filename',
help='Add PNG icon FILENAME to docset')
parser.add_argument('-p', '--index-page',
help='Set the file that is shown')
parser.add_argument('SOURCE',
help='Directory containing the HTML documents')
results = parser.parse_args()
source_dir = results.SOURCE
if source_dir[-1] == "/":
source_dir = results.SOURCE[:-1]
if not os.path.exists(source_dir):
print(source_dir + " does not exsit!")
exit(2)
dir_name = os.path.basename(source_dir)
if not results.name:
docset_name = dir_name + ".docset"
else:
docset_name = results.name + ".docset"
# create docset directory and copy files
doc_path = docset_name + "/Contents/Resources/Documents"
dsidx_path = docset_name + "/Contents/Resources/docSet.dsidx"
icon = docset_name + "/icon.png"
info = docset_name + "/Contents/info.plist"
destpath = results.path
if results.path and results.path[-1] != "/":
destpath += "/"
docset_path = destpath + doc_path
sqlite_path = destpath + dsidx_path
info_path = destpath + info
icon_path = destpath + icon
# print docset_path, sqlite_path
if not os.path.exists(docset_path):
os.makedirs(docset_path)
print("Create the Docset Folder!")
else:
print("Docset Folder already exist!")
# Copy the HTML Documentation to the Docset Folder
print(source_dir)
print(docset_path)
try:
#arg_list = ["cp", "-r"] + [source_dir + "/" + f for f in os.listdir(source_dir)] + [docset_path]
arg_list = ["cp", "-r"] + [source_dir + "/*"] + [docset_path]
print('%s\n'%arg_list)
subprocess.call(arg_list)
print("Copy the HTML Documentation!")
except:
print("**Error**: Copy Html Documents Failed...")
clear_trash()
exit(2)
# create and connect to SQLite
try:
db = sqlite3.connect(sqlite_path)
cur = db.cursor()
except:
print("**Error**: Create SQLite Index Failed...")
clear_trash()
exit(2)
try:
cur.execute('DROP TABLE searchIndex;')
except:
pass
cur.execute('CREATE TABLE searchIndex(id INTEGER PRIMARY KEY,\
name TEXT,\
type TEXT,\
path TEXT);')
cur.execute('CREATE UNIQUE INDEX anchor ON searchIndex (name, type, path);')
print("Create the SQLite Index")
add_urls()
db.commit()
db.close()
# Create the Info.plist File
if not results.index_page:
index_page = "index.html"
else:
index_page = results.index_page
add_infoplist(info_path, index_page)
# Add icon file if defined
icon_filename = results.filename
if icon_filename:
if icon_filename[-4:] == ".png" and os.path.isfile(icon_filename):
try:
subprocess.call(["cp", icon_filename, icon_path])
print("Create the Icon for the Docset!")
except:
print("**Error**: Copy Icon file failed...")
clear_trash()
exit(2)
else:
print("**Error**: Icon file should be a valid PNG image...")
clear_trash()
exit(2)
else:
pass
print("Generate Docset Successfully!")