-
Notifications
You must be signed in to change notification settings - Fork 2
/
docx_brf.rb
executable file
·195 lines (147 loc) · 4.22 KB
/
docx_brf.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
#!/usr/bin/env ruby
##########################################################
###
## File: docx_brf.rb
## Desc: Finds valid bible references in text
## By: Dewayne VanHoozer ([email protected])
#
require 'amazing_print' # Pretty print Ruby objects with proper indentation and colors
require 'pp'
require 'pathname' # STDLIB
require 'pathname_helpers'
require 'bible_gateway' # An unofficial 'API' for BibleGateway.com.
require 'pericope' # Pericope is a gem for parsing Bible references.
require 'docx' # a ruby library/gem for interacting with .docx files
require 'docx_helpers'
include DocxHelpers
me = Pathname.new(__FILE__).realpath
my_dir = me.parent
my_name = me.basename.to_s
$options = {
verbose: false,
docx_paths: []
}
def verbose?
$options[:verbose]
end
usage = <<EOS
Finds valid bible references in MS Word *.docx files
Usage: #{my_name} [options] docx_files
Where:
options Do This
-h or --help Display this message
-v or --verbose Display progress
docx_files List of *.docx files
EOS
# Check command line for Problems with Parameters
errors = []
if ARGV.empty? or
ARGV.include? '-h' or
ARGV.include?'--help'
puts usage
exit
end
if ARGV.include?('-v')
$options[:verbose] = true
i = ARGV.index '-v'
ARGV[i] = nil
end
if ARGV.include?('--verbose')
$options[:verbose] = true
i = ARGV.index '--verbose'
ARGV[i] = nil
end
ARGV.compact!
unless ARGV.empty?
$options[:docx_paths] = ARGV.map {|a| Pathname.new(a)}
$options[:docx_paths].each do | dp |
unless dp.exist?
errors << "File does not exist: #{dp.basename}"
else
unless '.docx' == dp.extname.downcase
errors << "File is not *.docx: #{dp.basename}"
else
if dp.basename.to_s.downcase.start_with? 'backup of'
errors << "File is a backup: #{basename}"
end
end
end
end
else
errors << "No files were specified on the command line"
end # unless ARGV.empty?
unless errors.empty?
puts
puts "Correct the following errors and try again:"
puts
errors.each do |e|
puts "\t#{e}"
end
puts
exit(1)
end
$options[:docx_paths] = ARGV.map {|a| Pathname.new(a).realpath}
bible = BibleGateway.new
bible.version = :holman_christian_standard_bible
######################################################
# Local methods
class String
def strip_html
unless self.empty?
s=''
strip_it = false
self.length.times do |x|
c = self[x]
case c
when '<' then
strip_it = true
when '>' then
strip_it = false
s += ' '
else
s += c unless strip_it
end
end # self.each do |c|
return s
end # unless self.empty?
self
end # def strip_html
end # class String
######################################################
# Main
$options[:docx_paths].each do | dp |
puts "Reviewing #{dp.basename} ..." if verbose?
docx = Docx::Document.open(dp)
docx.paragraphs.each do | a_paragraph |
pc = Pericope.parse(a_paragraph.text)
unless pc.empty?
pc.each do |r|
print "#{dp.basename}"
print "\t#{get_paragraph_style_name(a_paragraph)}"
print "\t#{r}"
# NOTE: MS Excel can not handle modern character encodings
verse_utf8 = bible.lookup(r.to_s)[:content].strip_html.gsub("\n",' ')
#verse_ansi = Iconv.iconv("LATIN1", "UTF-8", verse_utf8).join
verse_ansi = verse_utf8.
gsub('“','"'). # curly open double quote
gsub('”','"'). # curly close double quote
gsub("’","'"). # curly close single quote
encode(Encoding::ASCII_8BIT,
{ :invalid => :replace,
:undef => :replace,
:replace => ' '
}
)
puts "\t#{verse_ansi}"
end
end
end
end # $options[:docx_paths].each do | dp |
__END__
body_text.each do |a_line|
puts a_line
pc = Pericope.parse(a_line)
pc.each do |r|
puts "*** Found: #{r}"
end unless pc.empty?
end