forked from openaustralia/openaustralia-parser
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathimport_nsw_candidates.rb
342 lines (264 loc) · 8.89 KB
/
import_nsw_candidates.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
#!/usr/bin/env ruby
# == Synopsis
# Load the postcode and NSW candidate data directly into the database
#
# == Examples
# Import postcodes from postcodes_2011nsw.csv and candidates from candidates_2011nsw.csv
#
# ruby ./import_nsw_candidates.rb -p data/postcodes_2011nsw.csv -c data/candidates_2011nsw.csv
#
# Other examples:
#
#
# == Configuration
# Requires lib/configuration_nsw.rb which loads configuration data from:
# ./openaustralia-parser/lib/configuration.rb
#
# == Description
#
# 1. Remove national postcode data
# delete from postcode_lookup
# 2. Remove national member data
# delete from member
#
#
# == Usage
# import_nsw_candidates.rb [options] source_file
#
# For help use: import_nsw_candidates.rb -h
#
# == Options
# -p, --postcodes Postcode CSV file
# -c, --candidates Candidates CSV file
#
#
# == Author
# John Robens
#
# == Copyright
# Copyright (c) 2011 Interlated Pty Ltd. Licensed under the MIT License:
# http://www.opensource.org/licenses/mit-license.php
#
# http://blog.toddwerth.com/entries/show/5
$:.unshift "#{File.dirname(__FILE__)}/lib"
# in `require': no such file to load -- mysql (LoadError)
require 'rubygems'
require 'active_support/core_ext'
require 'csv'
require 'mysql'
require 'configuration_nsw'
require 'people'
require 'optparse'
require 'rdoc/usage'
require 'ostruct'
class NswParser
VERSION = '0.0.1'
attr_reader :options
def initialize(arguments, stdin)
@arguments = arguments
@stdin = stdin
# Set defaults
@options = OpenStruct.new
@options.verbose = false
@options.quiet = false
@options.postcodes = ""
@options.candidates = ""
end
# Parse options, check arguments, then process the command
def run
if parsed_options? && arguments_valid?
output_options if @options.verbose # [Optional]
process_arguments
# run as import_nsw_candidates -p "data/postcodes_2010nsw.csv" -c
postcode_parser = ParsePostcodeNSW.new()
postcode_parser.parse(@options.postcodes)
candidate_parser = ParseCandidateNSW.new()
candidate_parser.parse(@options.candidates)
else
output_usage
end
end
protected
def parsed_options?
# Specify options
opts = OptionParser.new
opts.on('-v', '--version') { output_version ; exit 0 }
opts.on('-h', '--help') { output_help }
opts.on('-V', '--verbose') { @options.verbose = true }
opts.on('-q', '--quiet') { @options.quiet = true }
# Non-generic options
opts.on('-p', '--postcodes STRING', 'Requires a postcodes CSV FILE ') do |postcodes|
@options.postcodes = postcodes
end
opts.on('-c', '--candidates STRING', 'Requires a candidates CSV FILE ') do |candidates|
@options.candidates = candidates
end
opts.parse!(@arguments) rescue return false
process_options
true
end
# Performs post-parse processing on options
def process_options
@options.verbose = false if @options.quiet
end
def output_options
puts "Options:\
"
@options.marshal_dump.each do |name, val|
puts " #{name} = #{val}"
end
end
# True if required arguments were provided
def arguments_valid?
# Should be no remaining arguments
true if @arguments.length == 0
end
# Setup the arguments
def process_arguments
# TO DO - place in local vars, etc
end
def output_help
output_version
RDoc::usage() #exits app
end
def output_usage
RDoc::usage('usage') # gets usage from comments above
end
def output_version
puts "#{File.basename(__FILE__)} version #{VERSION}"
end
def process_command
# TO DO - do whatever this app does
#process_standard_input # [Optional]
end
def process_standard_input
input = @stdin.read
# TO DO - process input
# [Optional]
# @stdin.each do |line|
# # TO DO - process each line
#end
end
end
#end
#
#
# Expects a CSV file of postcode|Electoral District.
#
#
class ParsePostcodeNSW
def initialize()
# Configuration for NSW - make sure that this is a different database to the cwth one.
@conf = ConfigurationNSW.new
end
def parse(file)
# Open the CSV file.
puts "Reading postcode data... #{file}"
divisions = CSV.readlines(file)
db = Mysql.real_connect(@conf.database_host, @conf.database_user, @conf.database_password, @conf.database_name)
# Clear out the old data
puts "clearing postcode_lookup"
db.query("DELETE FROM postcode_lookup")
sql = "INSERT INTO postcode_lookup (postcode, name) VALUES (?, ?)"
st = db.prepare(sql)
divisions.each do |division|
postcode = division[0]
electoral_district = division[1]
# Don't worry about skipping the first one - just check to see whether the postcode is numeric
# Works in AU may not work in other jurisdictions.
if !is_number?(postcode)
puts "Rejected #{postcode} from #{postcode},#{electoral_district} as the postcode should be a number."
elsif !is_not_null?(electoral_district)
puts "Rejected #{electoral_district} from #{postcode},#{electoral_district} as the electoral district should not be null."
else
st.execute(postcode, electoral_district)
end
end
st.close
#values = divisions.map {|row| "('#{row[0]}', '#{quote_string(row[1])}')" }.join(',')
#db.query("INSERT INTO postcode_lookup (postcode, name) VALUES #{values}")
end
def quote_string(s)
s.gsub(/\\/, '\&\&').gsub(/'/, "''") # ' (for ruby-mode)
end
def is_number?(i)
true if Float(i) rescue false
end
def is_not_null?(name)
true unless name.empty? rescue false
end
end
#
#
# Expects a CSV file of postcode|Electoral District.
#
#
class ParseCandidateNSW
def initialize()
# Not actually true, but necessary for them to come out in the postcode lookup
@entered_reason = "candidate"
@left_reason = "still_in_office"
@email_data_key = "email"
# Configuration for NSW - make sure that this is a different database to the cwth one.
@conf = ConfigurationNSW.new
@db = Mysql.real_connect(@conf.database_host, @conf.database_user, @conf.database_password, @conf.database_name)
end
def parse(file)
# Open the CSV file.
puts "Reading candidate data... #{file}"
candidates = CSV.readlines(file)
puts "clearing members"
@db.query("DELETE FROM member")
puts "clearing memberinfo"
@db.query("DELETE FROM memberinfo")
sql = "INSERT INTO member (house, first_name, last_name, constituency, party, entered_house, left_house, entered_reason, left_reason) VALUES (1,?,?,?,?,now(),now(),?, ?)"
st = @db.prepare(sql)
id_sql = "SELECT member_id from member where first_name = ? and last_name = ? and constituency = ? and party = ?"
id_st = @db.prepare(id_sql)
postcode_sql = "SELECT name FROM postcode_lookup where name = ?"
postcode_st = @db.prepare(postcode_sql)
memberinfo_sql = "INSERT INTO memberinfo (member_id,data_key,data_value) VALUES (?,?,?)"
memberinfo_st = @db.prepare(memberinfo_sql)
candidates.each do |candidate|
first_name = candidate[0]
last_name = candidate[1]
electoral_district = candidate[2]
party = candidate[3]
email = candidate[4]
# Reject any lines that don't have electoral districts in the postcode table
if !is_not_null?(email)
puts "Rejected #{email} from #{first_name},#{last_name},#{electoral_district},#{party} as the email address was blank."
elsif is_valid_electoral_district?(postcode_st, electoral_district)
st.execute(first_name, last_name, electoral_district,party,@entered_reason,@left_reason)
# Store the email in the memberinfo table
# Probably could have kept the member_id from the insert (?)
id_st.execute(first_name, last_name, electoral_district, party)
#hopefully only 1
member_id_row = id_st.fetch
update_memberinfo(memberinfo_st, member_id_row[0], email)
else
puts "Rejected #{electoral_district} from #{first_name},#{last_name},#{electoral_district},#{party} as the electoral district was not found in the postcode table."
end
end
st.close
id_st.close
postcode_st.close
memberinfo_st.close
end
def is_not_null?(name)
true unless name.empty? rescue false
end
# Look to see if the district is in the postcode table as a verification.
def is_valid_electoral_district?(st,district)
st.execute(district)
true if st.num_rows() != 0
end
# put the email address into the memberinfo table.
def update_memberinfo(st, member_id, email)
# No email - should roll back the entire transaction. Check prior.
st.execute(member_id, @email_data_key, email)
end
end
# Create and run the application
nsw_parser = NswParser.new(ARGV, STDIN)
nsw_parser.run