-
Notifications
You must be signed in to change notification settings - Fork 18
/
Rakefile
195 lines (171 loc) · 4.7 KB
/
Rakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
# Including only the changed build task
require 'rake'
require 'jekyll'
require 'nokogiri'
require 'open-uri'
require 'json'
require 'optparse'
require 'jemoji'
require 'rake'
require 'pigments'
require 'htmlentities'
require 'parallel'
require_relative '_scripts/spell_check.rb'
require 'etc'
require 'yaml'
require 'fileutils'
require 'nokogiri'
require 'json'
require 'openssl'
is_travis = ENV['TRAVIS'] == 'true'
main_json_file = '_data/man.json'
coursebook_dir = '_coursebook'
coursebook_url = 'https://github.com/illinois-cs241/coursebook.wiki.git'
DEST_DIR = './_site'
SEARCH_FILE = 'search_data.json'
$config = Jekyll.configuration({
:source => './',
:destination => DEST_DIR,
:timezone => 'America/Chicago',
:safe => false,
:host => '0.0.0.0',
})
def gen_search_json(site)
docs = []
site.collections.each do |key, col|
col.docs.each do |doca|
doc = Nokogiri::HTML(doca.content)
doc.search('.//a').remove
doc.search('.//pre').remove
s = doc.xpath('//text()').to_s
s.gsub!(".", ". ")
s.gsub!(/\s[Tt]he\s/, " ")
s.gsub!(/\s[Bb]e\s/, " ")
s.gsub!(/\s[Tt]o\s/, " ")
s.gsub!(/\s[Aa]nd\s/, " ")
s.squeeze!(" ")
s.delete!("\n")
doc_hash = Hash.new
doc_hash[:title] = doca.data['title']
doc_hash[:content] = s
doc_hash[:url] = doca.url
docs << doc_hash
end
end
File.write(DEST_DIR + '/' + SEARCH_FILE, docs.to_json)
end
multitask default: [
'pre_build:gen_man',
'pre_build:gen_coursebook',
] do
site = Jekyll::Site.new($config)
Jekyll::Commands::Build.build site, $config
gen_search_json site
cp './CNAME', './_site/CNAME'
end
multitask serve: [
'default',
] do
site = Jekyll::Site.new($config)
Jekyll::Commands::Serve.process $config
end
namespace :pre_build do
desc 'Houses all pre build tasks'
sections = [1, 2, 3, 4]
desc 'https://linux.die.net/man/ throws 403; see their robots.txt for more info'
base_url = 'https://man7.org/linux/man-pages/'
cache_time = 30 # days
task :gen_man, [:file] do |_t, args|
file = args[:file]
if file.nil?
file = main_json_file
puts "Using default file #{file}"
end
# Man pages don't change that often
if File.exist?(file) && ((File.mtime(file) <=> DateTime.now - cache_time) == 1)
puts 'Using cached file'
next
end
puts 'Updating file'
urls = sections.map do |e|
base_url + 'dir_section_' + e.to_s + '.html'
end
output = {}
urls.each do |url|
page = Nokogiri::HTML(URI.open(url,ssl_verify_mode: OpenSSL::SSL::VERIFY_NONE, 'User-Agent' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_0) AppleWebKit/600.1.17 (KHTML, like Gecko) Version/8.0 Safari/600.1.17'))
page.css('a').each do |link|
func_name = link.inner_html.match(/^(\w+)/)[1]
output[func_name] = base_url + link['href']
end
end
file_opts = File::RDWR | File::CREAT
File.open(file, file_opts, 0o644) do |f|
f.truncate 0
f.write(JSON.fast_generate(output))
puts "Successfully wrote #{file}"
end
end
def title_from_html(text)
file_no_ext = File.basename(text, '.md')
file_no_ext.tr('-', ' ')
end
def link_patterns(file, pattern_map)
f = File.open(file, 'r')
contents = f.read
f.close
new_contents = contents
pattern_map.each do |link, pattern|
new_contents = new_contents.gsub(pattern, link)
end
f = File.open(file, File::RDWR)
f.seek(0)
f.write(new_contents)
f.close
end
def prepend(file, string)
obj_file = Tempfile.new('')
f = File.open(file, 'r')
begin
obj_file.write(string)
obj_file.write(f.read)
f.close
obj_file.close
FileUtils.cp(obj_file.path, file)
ensure
obj_file.unlink
end
end
task :gen_coursebook, [:folder] do |_t, args|
folder = args[:folder]
if folder.nil?
folder = coursebook_dir
puts "Using default Folder #{folder}"
end
system "cd #{folder} && git clean -fq && git reset --hard HEAD"
Dir.glob("#{folder}/*md").each do |file|
file_contents = File.read(file)
matches = file_contents.match(/(.*)\n={3,}/)
if matches
page_title = matches.captures[0]
else
page_title = File.basename(file, '.md')
end
meta = {
'layout' => 'doc',
'title' => page_title,
'toc' => false,
}
prepend(file, "#{meta.to_yaml}\n---\n\n")
end
FileUtils.mv("#{coursebook_dir}/Home.md", "#{coursebook_dir}/index.md")
end
end
task :spell_check do
md_files = Dir.glob('*.md')
md_files += Dir.glob('_docs/*.md')
open_dictionaries do |dicts|
Parallel.map(md_files, in_threads: Etc.nprocessors) do |md_file|
check_spelling(md_file, dicts)
end
end
end