From 5dc6b05d31f7472c74cbb0be08266d77d96d2929 Mon Sep 17 00:00:00 2001 From: Rahul Nath Date: Fri, 18 Oct 2019 14:57:53 -0400 Subject: [PATCH 1/2] Add script files to generate ocd-ids to github repo. --- scripts/country-in/create_ocd_ids.py | 114 ++++++++ scripts/country-in/csv/br_constituencies.csv | 242 ++++++++++++++++ scripts/country-in/csv/br_districts.csv | 39 +++ scripts/country-in/csv/dl_constituencies.csv | 71 +++++ scripts/country-in/csv/dl_districts.csv | 12 + scripts/country-in/csv/hr_constituencies.csv | 91 ++++++ scripts/country-in/csv/hr_districts.csv | 23 ++ scripts/country-in/csv/jh_constituencies.csv | 82 ++++++ scripts/country-in/csv/jh_districts.csv | 25 ++ scripts/country-in/csv/mh_constituencies.csv | 287 +++++++++++++++++++ scripts/country-in/csv/mh_districts.csv | 37 +++ scripts/country-in/fetch_constituencies.py | 61 ++++ 12 files changed, 1084 insertions(+) create mode 100644 scripts/country-in/create_ocd_ids.py create mode 100644 scripts/country-in/csv/br_constituencies.csv create mode 100644 scripts/country-in/csv/br_districts.csv create mode 100644 scripts/country-in/csv/dl_constituencies.csv create mode 100644 scripts/country-in/csv/dl_districts.csv create mode 100644 scripts/country-in/csv/hr_constituencies.csv create mode 100644 scripts/country-in/csv/hr_districts.csv create mode 100644 scripts/country-in/csv/jh_constituencies.csv create mode 100644 scripts/country-in/csv/jh_districts.csv create mode 100644 scripts/country-in/csv/mh_constituencies.csv create mode 100644 scripts/country-in/csv/mh_districts.csv create mode 100644 scripts/country-in/fetch_constituencies.py diff --git a/scripts/country-in/create_ocd_ids.py b/scripts/country-in/create_ocd_ids.py new file mode 100644 index 00000000..2afdacdf --- /dev/null +++ b/scripts/country-in/create_ocd_ids.py @@ -0,0 +1,114 @@ +from __future__ import print_function +import csv +""" +Program to generate OCD IDs for the 2019 Indian Vidhan Sabha elections. + +State OCD-IDs created for Bihar, Delhi, Jharkhand, Maharashstra, and Haryana. + +Information on Districts and Constituencies for each state are read from a +CSV file called `{state_abbreviation}_constituencies.csv`. A left-join is executed +on the table read from this CSV and another (required) CSV called +`{state_abbrev}_districts.csv`, on the "district" column, creating a new table +that adds an abbreviation column to the original constituencies table. + +The {state_abbreviation}_constituencies.csv files can be created using the +`fetch_constituencies.py` script available in this directory. + +This script writes OCD-IDs into a CSV with the election name, in this case +vidhan_sabha.csv. It also prints out the parents OCD-IDs that need to be added +to the `federal_states_territories.csv` file before `compile.py` is run. +""" + +state_abbrevs = {"hr": "Haryana", "mh": "Maharashtra", "br": "Bihar", "dl": "Delhi", "jh": "Jharkhand"} +columns = ["id", "name"] +country = "in" +election_name = "Vidhan Sabha" +write_table = [] +new_file = True + +punc_replacements = { + " ": "_", + "(": "", + ")": "", + "-": "_" +} + +def read_csv(csv_file): + # returns in-memory list of table rows + table = [] + with open("csv/" + csv_file, "rb") as f: + csv_reader = csv.DictReader(f) + for row in csv_reader: + table.append(row) + return table + +def join_table(consts, districts, state, state_abbr): + # return joined table + new_table = [] + ht = {} + for d_row in districts: + district_key = d_row["district"] + ht[district_key] = d_row["abbreviation"].lower() + for c_row in consts: + # source of truth on district names: + # https://affidavit.eci.gov.in/ + cons_district = c_row["district"].strip() + + if cons_district in ht: + c_row["district"] = cons_district + c_row["district_abbr"] = ht[cons_district][:2] + else: + raise Exception("district {} doesn't have abbrev".format(cons_district)) + + constituency = c_row["constituency"] + for old, new in punc_replacements.items(): + constituency = constituency.replace(old, new) + c_row["constituency"] = constituency.lower() + c_row["state"] = state + c_row["state_abbr"] = state_abbr + c_row["district"] = cons_district + new_table.append(c_row) + return new_table + +def write_to_file(table): + # format hardcoded OCD ID + global new_file + ocd_id = "ocd-division/country:{}/{}:{}/district:{}/cd:{}" + rest = "{} constituency; {} district; {}" + write_header = None + # used to create top level OCD IDs if they don't exist + parent_set = set() + if new_file: + open_type = "w+" + new_file = False + write_header = True + else: + open_type = "a+" + with open("{}.csv".format(election_name.lower().replace(" ", "_")), open_type) as f: + writer = csv.DictWriter(f, fieldnames=columns) + if write_header: + writer.writeheader() + write_header = False + for row in table: + if row["state_abbr"] == "dl": + state_name = "territory" + else: + state_name = "state" + full_const = " ".join(word.capitalize() for word in row["constituency"].split("_")) + ocd_id_row = { + "id": ocd_id.format(country, state_name, row["state_abbr"], + row["district_abbr"], row["constituency"]), + "name": rest.format(full_const, row["district"], row["state"]) + } + parent = ocd_id_row["id"].rsplit("/", 1) + parent_set.add("/".join(parent[:-1]) + "," + row["district"]) + writer.writerow(ocd_id_row) + for parent in sorted(parent_set, key=lambda x: x.split(",")[-1]): + print(parent) + +for state_abbr, state in sorted(state_abbrevs.items()): + consts = read_csv("{}_constituencies.csv".format(state_abbr)) + districts = read_csv("{}_districts.csv".format(state_abbr)) + write_table.append(join_table(consts, districts, state, state_abbr)) +for t in write_table: + write_to_file(t) diff --git a/scripts/country-in/csv/br_constituencies.csv b/scripts/country-in/csv/br_constituencies.csv new file mode 100644 index 00000000..ede0c54c --- /dev/null +++ b/scripts/country-in/csv/br_constituencies.csv @@ -0,0 +1,242 @@ +constituency,district +Araria,Araria +Forbesganj,Araria +Jokihat,Araria +Narpatganj,Araria +Raniganj,Araria +Sikti,Araria +Arwal,Arwal +Kurtha,Arwal +Aurangabad,Aurangabad +Goh,Aurangabad +Kutumba,Aurangabad +Nabinagar,Aurangabad +Obra,Aurangabad +Rafiganj,Aurangabad +Amarpur,Banka +Banka,Banka +Belhar,Banka +Dhauraiya,Banka +Katoria,Banka +Bachhwara,Begusarai +Bakhri,Begusarai +Begusarai,Begusarai +Cheria Bariarpur,Begusarai +Matihani,Begusarai +Sahebpur Kamal,Begusarai +Teghra,Begusarai +Bhagalpur,Bhagalpur +Bihpur,Bhagalpur +Gopalpur,Bhagalpur +Kahalgaon,Bhagalpur +Nathnagar,Bhagalpur +Pirpainti,Bhagalpur +Sultanganj,Bhagalpur +Agiaon,Bhojpur +Arrah,Bhojpur +Barhara,Bhojpur +Jagdishpur,Bhojpur +Sandesh,Bhojpur +Shahpur,Bhojpur +Tarari,Bhojpur +Brahampur,Buxar +Buxar,Buxar +Dumraon,Buxar +Rajpur,Buxar +Alinagar,Darbhanga +Bahadurpur,Darbhanga +Benipur,Darbhanga +Darbhanga,Darbhanga +Darbhanga Rural,Darbhanga +Gaura Bauram,Darbhanga +Hayaghat,Darbhanga +Jale,Darbhanga +Keoti,Darbhanga +Kusheshwar Asthan,Darbhanga +Atri,Gaya +Barachatti,Gaya +Belaganj,Gaya +Bodh Gaya,Gaya +Gaya Town,Gaya +Gurua,Gaya +Imamganj,Gaya +Sherghati,Gaya +Tikari,Gaya +Wazirganj,Gaya +Baikunthpur,Gopalganj +Barauli,Gopalganj +Bhorey,Gopalganj +Gopalganj,Gopalganj +Hathua,Gopalganj +Kuchaikote,Gopalganj +Ghosi,Jahanabad +Jehanabad,Jahanabad +Makhdumpur,Jahanabad +Chakai,Jamui +Jamui,Jamui +Jhajha,Jamui +Sikandra,Jamui +Bhabua,Kaimur +Chainpur,Kaimur +Mohania,Kaimur +Ramgarh,Kaimur +Balrampur,Katihar +Barari,Katihar +Kadwa,Katihar +Katihar,Katihar +Korha,Katihar +Manihari,Katihar +Pranpur,Katihar +Alauli,Khagaria +Beldaur,Khagaria +Khagaria,Khagaria +Parbatta,Khagaria +Bahadurganj,Kishanganj +Kishanganj,Kishanganj +Kochadhaman,Kishanganj +Thakurganj,Kishanganj +Lakhisarai,Lakhisarai +Surajgarha,Lakhisarai +Alamnagar,Madhepura +Bihariganj,Madhepura +Madhepura,Madhepura +Singheshwar,Madhepura +Babubarhi,Madhubani +Benipatti,Madhubani +Bisfi,Madhubani +Harlakhi,Madhubani +Jhanjharpur,Madhubani +Khajauli,Madhubani +Laukaha,Madhubani +Madhubani,Madhubani +Phulparas,Madhubani +Rajnagar,Madhubani +Jamalpur,Munger +Munger,Munger +Tarapur,Munger +Aurai,Muzaffarpur +Baruraj,Muzaffarpur +Bochaha,Muzaffarpur +Gaighat,Muzaffarpur +Kanti,Muzaffarpur +Kurhani,Muzaffarpur +Minapur,Muzaffarpur +Muzaffarpur,Muzaffarpur +Paroo,Muzaffarpur +Sahebganj,Muzaffarpur +Sakra,Muzaffarpur +Asthawan,Nalanda +Biharsharif,Nalanda +Harnaut,Nalanda +Hilsa,Nalanda +Islampur,Nalanda +Nalanda,Nalanda +Rajgir,Nalanda +Gobindpur,Nawada +Hisua,Nawada +Nawada,Nawada +Rajauli,Nawada +Warsaliganj,Nawada +Bagaha,Paschim Champaran +Bettiah,Paschim Champaran +Chanpatia,Paschim Champaran +Lauriya,Paschim Champaran +Narkatiaganj,Paschim Champaran +Nautan,Paschim Champaran +Ramnagar,Paschim Champaran +Sikta,Paschim Champaran +Valmikinagar,Paschim Champaran +Bakhtiarpur,Patna +Bankipur,Patna +Barh,Patna +Bikram,Patna +Danapur,Patna +Digha,Patna +Fatuha,Patna +Kumhrar,Patna +Maner,Patna +Masaurhi,Patna +Mokama,Patna +Paliganj,Patna +Patna Sahib,Patna +Phulwari,Patna +Amour,Purnia +Baisi,Purnia +Banmankhi,Purnia +Dhamdaha,Purnia +Kasba,Purnia +Purnia,Purnia +Rupauli,Purnia +Chiraiya,Purvi Champaran +Dhaka,Purvi Champaran +Govindganj,Purvi Champaran +Harsidhi,Purvi Champaran +Kalyanpur,Samastipur +Kesaria,Purvi Champaran +Madhuban,Purvi Champaran +Motihari,Purvi Champaran +Narkatia,Purvi Champaran +Pipra,Supaul +Raxaul,Purvi Champaran +Sugauli,Purvi Champaran +Chenari,Rohtas +Dehri,Rohtas +Dinara,Rohtas +Karakat,Rohtas +Kargahar,Rohtas +Nokha,Rohtas +Sasaram,Rohtas +Mahishi,Saharsa +Saharsa,Saharsa +Simri Bakhtiarpur,Saharsa +Sonbarsha,Saharsa +Bibhutipur,Samastipur +Hasanpur,Samastipur +Mohiuddinnagar,Samastipur +Morwa,Samastipur +Rosera,Samastipur +Samastipur,Samastipur +Sarairanjan,Samastipur +Ujiarpur,Samastipur +Warisnagar,Samastipur +Amnour,Saran +Baniapur,Saran +Chapra,Saran +Ekma,Saran +Garkha,Saran +Manjhi,Saran +Marhaura,Saran +Parsa,Saran +Sonepur,Saran +Taraiya,Saran +Barbigha,Sheikhpura +Sheikhpura,Sheikhpura +Sheohar,Sheohar +Bajpatti,Sitamarhi +Bathnaha,Sitamarhi +Belsand,Sitamarhi +Parihar,Sitamarhi +Riga,Sitamarhi +Runnisaidpur,Sitamarhi +Sitamarhi,Sitamarhi +Sursand,Sitamarhi +Barharia,Siwan +Darauli,Siwan +Daraundha,Siwan +Goriakothi,Siwan +Maharajganj,Siwan +Raghunathpur,Siwan +Siwan,Siwan +Ziradei,Siwan +Chhatapur,Supaul +Nirmali,Supaul +Supaul,Supaul +Triveniganj,Supaul +Hajipur,Vaishali +Lalganj,Vaishali +Mahnar,Vaishali +Mahua,Vaishali +Patepur,Vaishali +Raghopur,Vaishali +Rajapakar,Vaishali +Vaishali,Vaishali diff --git a/scripts/country-in/csv/br_districts.csv b/scripts/country-in/csv/br_districts.csv new file mode 100644 index 00000000..28890237 --- /dev/null +++ b/scripts/country-in/csv/br_districts.csv @@ -0,0 +1,39 @@ +abbreviation,district,headquarters +AR,Araria,Araria +AW,Arwal,Arwal +AU,Aurangabad,Aurangabad +BA,Banka,Banka +BE,Begusarai,Begusarai +BG,Bhagalpur,Bhagalpur +BJ,Bhojpur,Arrah +BU,Buxar,Buxar +DA,Darbhanga,Darbhanga +EC,Purvi Champaran,Motihari +GA,Gaya,Gaya +GO,Gopalganj,Gopalganj +JA,Jamui,Jamui +JE,Jahanabad,Jahanabad +KM,Kaimur,Bhabua +KT,Katihar,Katihar +KH,Khagaria,Khagaria +KI,Kishanganj,Kishanganj +LA,Lakhisarai,Lakhisarai +MP,Madhepura,Madhepura +MB,Madhubani,Madhubani +MG,Munger,Munger +MZ,Muzaffarpur,Muzaffarpur +NL,Nalanda,Bihar Sharif +NW,Nawada,Nawada +PA,Patna,Patna +PU,Purnia,Purnia +RO,Rohtas,Sasaram +SH,Saharsa,Saharsa +SM,Samastipur,Samastipur +SR,Saran,Chhapra +SP,Sheikhpura,Sheikhpura +SO,Sheohar,Sheohar +ST,Sitamarhi,Sitamarhi +SW,Siwan,Siwan +SU,Supaul,Supaul +VA,Vaishali,Hajipur +WC,Paschim Champaran,Bettiah diff --git a/scripts/country-in/csv/dl_constituencies.csv b/scripts/country-in/csv/dl_constituencies.csv new file mode 100644 index 00000000..d8233559 --- /dev/null +++ b/scripts/country-in/csv/dl_constituencies.csv @@ -0,0 +1,71 @@ +constituency,district +Ballimaran,Central Delhi +Burari,Central Delhi +Chandni Chowk,Central Delhi +Karol Bagh,Central Delhi +Matia Mahal,Central Delhi +Sadar Bazar,Central Delhi +Timarpur,Central Delhi +Gandhi Nagar,East Delhi +Kondli,East Delhi +Krishna Nagar,East Delhi +Laxmi Nagar,East Delhi +Patparganj,East Delhi +Trilokpuri,East Delhi +Delhi Cantt,New Delhi +Greater Kailash,New Delhi +New Delhi,New Delhi +Patel Nagar,New Delhi +R K Puram,New Delhi +Rajinder Nagar,New Delhi +Adarsh Nagar,North Delhi +Badli,North Delhi +Bawana,North Delhi +Model Town,North Delhi +Nerela,North Delhi +Rohini,North Delhi +Shakur Basti,North Delhi +Wazirpur,North Delhi +Ghonda,North East Delhi +Gokalpur,North East Delhi +Karawal Nagar,North East Delhi +Mustafabad,North East Delhi +Seelampur,North East Delhi +Kirari,North West Delhi +Mangol Puri,North West Delhi +Mundka,North West Delhi +Rithala,North West Delhi +Shalimar Bagh,North West Delhi +Sultanpur Majra,North West Delhi +Tri Nagar,North West Delhi +Babarpur,Shahdara +Rohtas Nagar,Shahdara +Seema Puri,Shahdara +Shahdara,Shahdara +Vishwas Nagar,Shahdara +Ambedkar Nagar,South Delhi +Chhatarpur,South Delhi +Deoli,South Delhi +Malviya Nagar,South Delhi +Mehrauli,South Delhi +Bijwasan,South West Delhi +Dwarka,South West Delhi +Matiala,South West Delhi +Najafgarh,South West Delhi +Palam,South West Delhi +Uttam Nagar,South West Delhi +Vikaspuri,South West Delhi +Badarpur,South East Delhi +Jangpura,South East Delhi +Kalkaji,South East Delhi +Kasturba Nagar,South East Delhi +Okhla,South East Delhi +Sangam Vihar,South East Delhi +Tughlakabad,South East Delhi +Hari Nagar,West Delhi +Janakpuri,West Delhi +Madipur,West Delhi +Moti Nagar,West Delhi +Nangloi Jat,West Delhi +Rajouri Garden,West Delhi +Tilak Nagar,West Delhi diff --git a/scripts/country-in/csv/dl_districts.csv b/scripts/country-in/csv/dl_districts.csv new file mode 100644 index 00000000..c6f44bba --- /dev/null +++ b/scripts/country-in/csv/dl_districts.csv @@ -0,0 +1,12 @@ +abbreviation,district,headquarters +CD,Central Delhi,Daryaganj +ED,East Delhi,Preet Vihar +ND,New Delhi,Connaught Place +NO,North Delhi,Sadar Bazaar +NE,North East Delhi,Seelampur +NW,North West Delhi,Kanjhawala +SH,Shahdara,Shahdara +SD,South Delhi,Saket +SE,South East Delhi,Defence Colony +SW,South West Delhi,Vasant Vihar +WD,West Delhi,Rajouri Garden \ No newline at end of file diff --git a/scripts/country-in/csv/hr_constituencies.csv b/scripts/country-in/csv/hr_constituencies.csv new file mode 100644 index 00000000..feb13bc1 --- /dev/null +++ b/scripts/country-in/csv/hr_constituencies.csv @@ -0,0 +1,91 @@ +constituency,district +Badkhal,Faridabad +Ballabhgarh,Faridabad +Faridabad,Faridabad +Faridabad Nit,Faridabad +Prithla,Faridabad +Tigaon,Faridabad +Ambala Cantt.,Ambala +Ambala City,Ambala +Mulana,Ambala +Naraingarh,Ambala +Bawani Khera,Bhiwani +Bhiwani,Bhiwani +Loharu,Bhiwani +Tosham,Bhiwani +Badhra,Charkhi Dadri +Dadri,Charkhi Dadri +Fatehabad,Fatehabad +Ratia,Fatehabad +Tohana,Fatehabad +Badshahpur,Gurgaon +Gurgaon,Gurgaon +Pataudi,Gurgaon +Sohna,Gurgaon +Adampur,Hisar +Barwala,Hisar +Hansi,Hisar +Hisar,Hisar +Nalwa,Hisar +Narnaund,Hisar +Uklana,Hisar +Badli,Jhajjar +Bahadurgarh,Jhajjar +Beri,Jhajjar +Jhajjar,Jhajjar +Jind,Jind +Julana,Jind +Narwana,Jind +Safidon,Jind +Uchana Kalan,Jind +Guhla,Kaithal +Kaithal,Kaithal +Kalayat,Kaithal +Pundri,Kaithal +Assandh,Karnal +Gharaunda,Karnal +Indri,Karnal +Karnal,Karnal +Nilokheri,Karnal +Ladwa,Kurukshetra +Pehowa,Kurukshetra +Shahbad,Kurukshetra +Thanesar,Kurukshetra +Ateli,Mahendragarh +Mahendragarh,Mahendragarh +Nangal Chaudhry,Mahendragarh +Narnaul,Mahendragarh +Ferozepur Jhirka,Nuh +Nuh,Nuh +Punahana,Nuh +Hathin,Palwal +Hodal,Palwal +Palwal,Palwal +Kalka,Panchkula +Panchkula,Panchkula +Israna,Panipat +Panipat City,Panipat +Panipat Rural,Panipat +Samalkha,Panipat +Bawal,Rewari +Kosli,Rewari +Rewari,Rewari +Garhi Samplakiloi,Rohtak +Kalanaur,Rohtak +Meham,Rohtak +Rohtak,Rohtak +Dabwali,Sirsa +Ellenabad,Sirsa +Kalawali,Sirsa +Rania,Sirsa +Sirsa,Sirsa +Baroda,Sonipat +Ganaur,Sonipat +Gohana,Sonipat +Kharkhauda,Sonipat +Rai,Sonipat +Sonipat,Sonipat +Jagadhri,Yamunanagar +Radaur,Yamunanagar +Sadhaura,Yamunanagar +Yamunanagar,Yamunanagar diff --git a/scripts/country-in/csv/hr_districts.csv b/scripts/country-in/csv/hr_districts.csv new file mode 100644 index 00000000..a4ccea7c --- /dev/null +++ b/scripts/country-in/csv/hr_districts.csv @@ -0,0 +1,23 @@ +district,abbreviation +Ambala,AM +Bhiwani,BH +Charkhi Dadri,CD +Faridabad,FR +Fatehabad,FT +Gurgaon,GU +Hisar,HI +Jhajjar,JH +Jind,JI +Kaithal,KT +Karnal,KR +Kurukshetra,KU +Mahendragarh,MH +Nuh,NH +Palwal,PL +Panchkula,PK +Panipat,PP +Rewari,RE +Rohtak,RO +Sirsa,SI +Sonipat,SO +Yamunanagar,YN diff --git a/scripts/country-in/csv/jh_constituencies.csv b/scripts/country-in/csv/jh_constituencies.csv new file mode 100644 index 00000000..bfddca35 --- /dev/null +++ b/scripts/country-in/csv/jh_constituencies.csv @@ -0,0 +1,82 @@ +constituency,district +Bermo,Bokaro +Bokaro,Bokaro +Chandankyari,Bokaro +Gomia,Bokaro +Chatra,Chatra +Simaria,Chatra +Deoghar,Deoghar +Madhupur,Deoghar +Sarath,Deoghar +Baghmara,Dhanbad +Dhanbad,Dhanbad +Jharia,Dhanbad +Nirsa,Dhanbad +Sindri,Dhanbad +Tundi,Dhanbad +Dumka,Dumka +Jama,Dumka +Jarmundi,Dumka +Shikaripara,Dumka +Baharagora,East Singhbhum +Ghatshila,East Singhbhum +Jamshedpur East,East Singhbhum +Jamshedpur West,East Singhbhum +Jugsalai,East Singhbhum +Potka,East Singhbhum +Bhawanathpur,Garhwa +Garhwa,Garhwa +Bagodar,Giridih +Dhanwar,Giridih +Dumri,Giridih +Gandey,Giridih +Giridih,Giridih +Jamua,Giridih +Godda,Godda +Mahagama,Godda +Poreyahat,Godda +Bishunpur,Gumla +Gumla,Gumla +Sisai,Gumla +Barhi,Hazaribagh +Barkattha,Hazaribagh +Hazaribagh,Hazaribagh +Mandhu,Hazaribagh +Jamtara,Jamtara +Nala,Jamtara +Khunti,Khunti +Torpa,Khunti +Kodarma,Kodarma +Latehar,Latehar +Manika,Latehar +Lohardaga,Lohardaga +Littipara,Pakur +Maheshpur,Pakur +Pakur,Pakur +Bishrampur,Palamu +Chattarpur,Palamu +Daltonganj,Palamu +Hussainabad,Palamu +Panki,Palamu +Barkagaon,Ramgarh +Ramgarh,Ramgarh +Hatia,Ranchi +Kanke,Ranchi +Khijri,Ranchi +Mandar,Ranchi +Ranchi,Ranchi +Silli,Ranchi +Tamar,Ranchi +Barhait,Sahebganj +Borio,Sahebganj +Rajmahal,Sahebganj +Ichagarh,Saraikela Kharswan +Kharasawan,Saraikela Kharswan +Saraikella,Saraikela Kharswan +Kolebira,Simdega +Simdega,Simdega +Chaibasa,West Singhbhum +Chakradharpur,West Singhbhum +Jaganathpur,West Singhbhum +Majhganon,West Singhbhum +Manoharpur,West Singhbhum diff --git a/scripts/country-in/csv/jh_districts.csv b/scripts/country-in/csv/jh_districts.csv new file mode 100644 index 00000000..cb08f447 --- /dev/null +++ b/scripts/country-in/csv/jh_districts.csv @@ -0,0 +1,25 @@ +abbreviation,district,headquarters +BO,Bokaro,Bokaro +CH,Chatra,Chatra +DE,Deoghar,Deoghar +DH,Dhanbad,Dhanbad +DU,Dumka,Dumka +ES,East Singhbhum,Jamshedpur +GA,Garhwa,Garhwa +GI,Giridih,Giridih +GO,Godda,Godda +GU,Gumla,Gumla +HA,Hazaribagh,Hazaribag +JA,Jamtara,Jamtara +KH,Khunti,Khunti +KO,Kodarma,Koderma +LA,Latehar,Latehar +LO,Lohardaga,Lohardaga +PK,Pakur,Pakur +PL,Palamu,Daltonganj +RM,Ramgarh,Ramgarh +RA,Ranchi,Ranchi +SA,Sahebganj,Sahebganj +SK,Saraikela Kharswan,Seraikela +SI,Simdega,Simdega +WS,West Singhbhum,Chaibasa \ No newline at end of file diff --git a/scripts/country-in/csv/mh_constituencies.csv b/scripts/country-in/csv/mh_constituencies.csv new file mode 100644 index 00000000..c3b0200e --- /dev/null +++ b/scripts/country-in/csv/mh_constituencies.csv @@ -0,0 +1,287 @@ +constituency,district +Ahmednagar City,Ahmednagar +Akole,Ahmednagar +Karjat Jamkhed,Ahmednagar +Kopargaon,Ahmednagar +Nevasa,Ahmednagar +Parner,Ahmednagar +Rahuri,Ahmednagar +Sangmner,Ahmednagar +Shevgaon,Ahmednagar +Shirdi,Ahmednagar +Shrigonda,Ahmednagar +Shrirampur,Ahmednagar +Aakola West,Akola +Akola East,Akola +Akot,Akola +Balapur,Akola +Murtizapur,Akola +Achalpur,Amaravati +Amrawati,Amaravati +Badnera,Amaravati +Daryapur,Amaravati +Dhamangaon Railway,Amaravati +Melghat,Amaravati +Morshi,Amaravati +Teosa,Amaravati +Aurangabad,Aurangabad +Aurangbad,Aurangabad +Gangapur,Aurangabad +Kannad,Aurangabad +Paithan,Aurangabad +Pholambari,Aurangabad +Sillod,Aurangabad +Vaijapur,Aurangabad +Ashti,Beed +Beed,Beed +Georai,Beed +Kaij,Beed +Majalgaon,Beed +Parli,Beed +Bhandara,Bhandara +Sakoli,Bhandara +Tumsar,Bhandara +Buldhana,Buldhana +Chikhli,Buldhana +Jalgaon,Buldhana +Khamgaon,Buldhana +Malkapur,Buldhana +Mehkar,Buldhana +Sindhkhed Raja,Buldhana +Ballarpur,Chandrapur +Bramhapuri,Chandrapur +Chandrapur,Chandrapur +Chimur,Chandrapur +Rajura,Chandrapur +Warora,Chandrapur +Dhule City,Dhule +Dhule Rural,Dhule +Sakri,Dhule +Shirpur,Dhule +Sindhkheda,Dhule +Aheri,Gadchiroli +Armori,Gadchiroli +Gadchiroli,Gadchiroli +Amgaon,Gondiya +Arjuni Morgaon (sc),Gondiya +Gondia,Gondiya +Tirora,Gondiya +Basmath,Hingoli +Hingoli,Hingoli +Kalamnuri,Hingoli +Amalner,Jalgaon +Bhusawal,Jalgaon +Chalisgaon,Jalgaon +Chopda,Jalgaon +Erandol,Jalgaon +Jalgaon City,Jalgaon +Jalgaon Rural,Jalgaon +Jamner,Jalgaon +Muktainagar,Jalgaon +Pachora,Jalgaon +Raver,Jalgaon +Badnapur,Jalna +Bhokardan,Jalna +Gansavangi,Jalna +Jalna,Jalna +Partur,Jalna +Chandgad,Kolhapur +Hatkanangle,Kolhapur +Ichalkaranji,Kolhapur +Kagal,Kolhapur +Karvir,Kolhapur +Kolhapur North,Kolhapur +Kolhapur South,Kolhapur +Radhanagari,Kolhapur +Shahuwadi,Kolhapur +Shirol,Kolhapur +Ahmedpur,Latur +Ausa,Latur +Latur City,Latur +Latur Rural,Latur +Nilanga,Latur +Udgir,Latur +Byculla,Mumbai City +Colaba,Mumbai City +Dharavi,Mumbai City +Mahim,Mumbai City +Malabar Hill,Mumbai City +Mumbadevi,Mumbai City +Shivadi,Mumbai City +Sion Koliwada,Mumbai City +Wadala,Mumbai City +Worli,Mumbai City +Andheri West,Mumbai Suburban +Andheri East,Mumbai Suburban +Anushakti Nagar,Mumbai Suburban +Bhandup West,Mumbai Suburban +Borivali,Mumbai Suburban +Chandivali,Mumbai Suburban +Charkop,Mumbai Suburban +Chembur,Mumbai Suburban +Dhaisar,Mumbai Suburban +Dindoshi,Mumbai Suburban +Ghatkopar West,Mumbai Suburban +Ghatkopar East,Mumbai Suburban +Goregaon,Mumbai Suburban +Jogeshwari East,Mumbai Suburban +Kalina,Mumbai Suburban +Kandivali East,Mumbai Suburban +Kurla,Mumbai Suburban +Magathane,Mumbai Suburban +Malad West,Mumbai Suburban +Mankhurd Shivajinagar,Mumbai Suburban +Mulund,Mumbai Suburban +Vandre West,Mumbai Suburban +Vandre East,Mumbai Suburban +Varsova,Mumbai Suburban +Vikhroli,Mumbai Suburban +Vile Parle,Mumbai Suburban +Hingna,Nagpur +Kamthi,Nagpur +Katol,Nagpur +Nagpur Central,Nagpur +Nagpur East,Nagpur +Nagpur North (sc),Nagpur +Nagpur South,Nagpur +Nagpur South West,Nagpur +Nagpur West,Nagpur +Ramtek,Nagpur +Savner,Nagpur +Umred,Nagpur +Bhokar,Nanded +Deglur,Nanded +Hadgaon,Nanded +Kinwat,Nanded +Loha,Nanded +Mukhed,Nanded +Naigaon,Nanded +Nanded North,Nanded +Nanded South,Nanded +Akkalkuwa,Nandurabar +Nandurbar,Nandurabar +Nawapur,Nandurabar +Shahada,Nandurabar +Baglan,Nashik +Chandwad,Nashik +Deolali,Nashik +Dindori,Nashik +Igatpuri,Nashik +Kalwan,Nashik +Malegaon,Nashik +Nandgaon,Nashik +Nashik West,Nashik +Nashik,Nashik +Nashik East,Nashik +Niphad,Nashik +Sinnar,Nashik +Yevla,Nashik +Omerga,Osmanabad +Osmanabad,Osmanabad +Paranda,Osmanabad +Tuljapur,Osmanabad +Boisar,Palghar +Dahanu,Palghar +Nalasopara,Palghar +Palghar,Palghar +Vasai,Palghar +Vekramgrth,Palghar +Gangakhed,Parbhani +Jintur,Parbhani +Parbhani,Parbhani +Pathri,Parbhani +Ambegaon,Pune +Baramati,Pune +Bhor,Pune +Bhosari,Pune +Chinchwad,Pune +Daund,Pune +Hadapsar,Pune +Indapur,Pune +Junnar,Pune +Kasba Peth,Pune +Khadakwasala,Pune +Khed Alandi,Pune +Kothrud,Pune +Maval,Pune +Parvati,Pune +Pimpri,Pune +Pune Cantonment (sc),Pune +Purandar,Pune +Shirur,Pune +Shivajinagar,Pune +Vadgaon Sheri,Pune +Alibag,Raigad +Karjat,Raigad +Mahad,Raigad +Panvel,Raigad +Pen,Raigad +Shrivardhan,Raigad +Uran,Raigad +Chiplun,Ratnagiri +Dapoli,Ratnagiri +Guhagar,Ratnagiri +Rajapur,Ratnagiri +Ratnagiri,Ratnagiri +Islampur,Sangli +Jat,Sangli +Khanpur,Sangli +Miraj,Sangli +Paluskadegaon,Sangli +Sangli,Sangli +Shirala,Sangli +Tasgaonkavathe Mahankal,Sangli +Karad North,Satara +Karad South,Satara +Koregaon,Satara +Man,Satara +Patan,Satara +Phaltan,Satara +Satara,Satara +Wai,Satara +Kankavli,Sindhudurg +Kudal,Sindhudurg +Sawantwadi,Sindhudurg +Akkalkot,Solapur +Barshi,Solapur +Karmala,Solapur +Madha,Solapur +Malshiran,Solapur +Mohol,Solapur +Pandharpur,Solapur +Sangola,Solapur +Solapur City Central,Solapur +Solapur City North,Solapur +Solapur South,Solapur +Airoli,Thane +Ambarnath,Thane +Belapur,Thane +Bhiwandi West,Thane +Bhiwandi East,Thane +Bhiwandi Rural (st),Thane +Dombivali,Thane +Kalyan West,Thane +Kalyan East,Thane +Kalyan Rural,Thane +Kopripachpakhadi,Thane +Meera Bhayandar,Thane +Mumbrakalwa,Thane +Murbad,Thane +Ovala Majiwada,Thane +Shahapur,Thane +Thane,Thane +Ulhasnagar,Thane +Arvi,Wardha +Deoli,Wardha +Hinganghat,Wardha +Wardha,Wardha +Karanja,Washim +Risod,Washim +Washim,Washim +Arni,Yavatmal +Digras,Yavatmal +Pusad,Yavatmal +Ralegaon,Yavatmal +Umarkhed,Yavatmal +Wani,Yavatmal +Yavatmal,Yavatmal diff --git a/scripts/country-in/csv/mh_districts.csv b/scripts/country-in/csv/mh_districts.csv new file mode 100644 index 00000000..65bacb97 --- /dev/null +++ b/scripts/country-in/csv/mh_districts.csv @@ -0,0 +1,37 @@ +district,abbreviation +Ahmednagar,AH +Akola,AK +Amaravati,AM +Aurangabad,AU +Beed,BI +Bhandara,BH +Buldhana,BU +Chandrapur,CH +Dhule,DH +Gadchiroli,GA +Gondiya,GO +Hingoli,HI +Jalgaon,JG +Jalna,JN +Kolhapur,KO +Latur,LA +Mumbai City,MC +Mumbai Suburban,MU +Nagpur,NG +Nanded,ND +Nandurabar,NB +Nashik,NS +Osmanabad,OS +Palghar,PL +Parbhani,PA +Pune,PU +Raigad,RG +Ratnagiri,RT +Sangli,SN +Satara,ST +Sindhudurg,SI +Solapur,SO +Thane,TH +Wardha,WR +Washim,WS +Yavatmal,YT diff --git a/scripts/country-in/fetch_constituencies.py b/scripts/country-in/fetch_constituencies.py new file mode 100644 index 00000000..915ae654 --- /dev/null +++ b/scripts/country-in/fetch_constituencies.py @@ -0,0 +1,61 @@ +import urllib.request +import csv +import json + +state_codes = { "br": "S04", "dl": "U05", "jh": "S27", "hr": "S07", "mh": "S13" } +consts_file = "{}_constituencies" +districts_url = "https://electoralsearch.in/Home/GetDistList?st_code={}" +constituency_url = "https://electoralsearch.in/Home/GetAcList?dist_no={}&st_code={}" +replacements = { + "_": " ", + "-": "" +} +columns = ["constituency", "district"] +name_replacements = { + "Kaimur (bhabua)".upper(): "Kaimur", + "saraikela- kharswan".upper(): "Saraikela Kharswan" +} + +def clean_name(name): + name = name.strip() + name_test = name.split(" ") + if len(name_test) > 1 and name_test[1].startswith("("): + name = name_test[0] + if name in name_replacements: + return name_replacements[name] + else: + for old, new in replacements.items(): + name = name.replace(old, new) + return " ".join([n.lower().capitalize() for n in name.split(" ")]) + + +state_consts = {} + +for state, state_code in state_codes.items(): + with urllib.request.urlopen(districts_url.format(state_code)) as response: + data = response.read() + encoding = response.info().get_content_charset("utf-8") + res = json.loads(data.decode(encoding)) + const_districts = {} + for district in res: + + dist_code = district["dist_no"] + init_name = district["dist_name"] + if state == "dl" and "delhi" not in district["dist_name"].lower(): + init_name = init_name.replace(" ", "") + " DELHI" + district_name = clean_name(init_name) + with urllib.request.urlopen(constituency_url.format(dist_code, state_code)) as const_response: + data = const_response.read() + encoding = const_response.info().get_content_charset("utf-8") + const_res = json.loads(data.decode(encoding)) + for const in const_res: + const_name = clean_name(const["ac_name"]) + const_districts[const_name] = district_name + state_consts[state] = const_districts + + +for state, consts in sorted(state_consts.items()): + with open("{}_constituencies.csv".format(state), "w+") as f: + writer = csv.DictWriter(f, fieldnames=columns) + for const, district in consts.items(): + writer.writerow({ "constituency": const, "district": district }) From 79faad0f8d44eda648151deb948496a4f79fc913 Mon Sep 17 00:00:00 2001 From: Rahul Nath Date: Fri, 18 Oct 2019 17:32:10 -0400 Subject: [PATCH 2/2] Add scripts and csv files --- scripts/country-in/create_ocd_ids.py | 60 +++++++++++++++------- scripts/country-in/fetch_constituencies.py | 42 +++++++++++---- 2 files changed, 74 insertions(+), 28 deletions(-) diff --git a/scripts/country-in/create_ocd_ids.py b/scripts/country-in/create_ocd_ids.py index 2afdacdf..c898ea05 100644 --- a/scripts/country-in/create_ocd_ids.py +++ b/scripts/country-in/create_ocd_ids.py @@ -1,8 +1,6 @@ -from __future__ import print_function -import csv """ Program to generate OCD IDs for the 2019 Indian Vidhan Sabha elections. - +Adaptable to generate OCD-IDs for elections with districts and constituencies. State OCD-IDs created for Bihar, Delhi, Jharkhand, Maharashstra, and Haryana. Information on Districts and Constituencies for each state are read from a @@ -17,15 +15,32 @@ This script writes OCD-IDs into a CSV with the election name, in this case vidhan_sabha.csv. It also prints out the parents OCD-IDs that need to be added to the `federal_states_territories.csv` file before `compile.py` is run. + +Source of truth for district and constituency names: https://electoralsearch.in """ +from __future__ import print_function +import csv -state_abbrevs = {"hr": "Haryana", "mh": "Maharashtra", "br": "Bihar", "dl": "Delhi", "jh": "Jharkhand"} -columns = ["id", "name"] + +# For file names and OCD-IDs. +state_abbrevs = {"hr": "Haryana", "mh": "Maharashtra", + "br": "Bihar", "dl": "Delhi", "jh": "Jharkhand"} country = "in" + +# Final output CSV name and its columns. election_name = "Vidhan Sabha" +columns = ["id", "name"] + +# Final table being written to ocd-id CSV write_table = [] new_file = True +# If top level OCD-IDs don't exist +# Prints them to stdout to be added to +# federal_states_territories.csv. +print_top_level_ids = True + +# cleans the data of certain punctuation punc_replacements = { " ": "_", "(": "", @@ -34,24 +49,22 @@ } def read_csv(csv_file): - # returns in-memory list of table rows + """Returns in-memory list of table rows.""" table = [] - with open("csv/" + csv_file, "rb") as f: + with open("csv/" + csv_file, "r", encoding="utf-8") as f: csv_reader = csv.DictReader(f) for row in csv_reader: table.append(row) return table def join_table(consts, districts, state, state_abbr): - # return joined table + """Joins abbrevations with constituency districts.""" new_table = [] - ht = {} + ht = {} # Hashtable that stores the district abbreviation. for d_row in districts: district_key = d_row["district"] ht[district_key] = d_row["abbreviation"].lower() for c_row in consts: - # source of truth on district names: - # https://affidavit.eci.gov.in/ cons_district = c_row["district"].strip() if cons_district in ht: @@ -71,41 +84,50 @@ def join_table(consts, districts, state, state_abbr): return new_table def write_to_file(table): - # format hardcoded OCD ID + """Formats hardcoded OCD ID and writes to CSV file.""" global new_file ocd_id = "ocd-division/country:{}/{}:{}/district:{}/cd:{}" rest = "{} constituency; {} district; {}" write_header = None - # used to create top level OCD IDs if they don't exist + + # Used to create top level OCD IDs if they don't exist. parent_set = set() + + # Write headers if it's the first table being written. if new_file: open_type = "w+" new_file = False write_header = True else: open_type = "a+" - with open("{}.csv".format(election_name.lower().replace(" ", "_")), open_type) as f: + with open("{}.csv".format(election_name.lower().replace(" ", "_")), + open_type, encoding="utf-8") as f: writer = csv.DictWriter(f, fieldnames=columns) if write_header: writer.writeheader() write_header = False for row in table: + # An exception: Delhi is a territory, not a state. if row["state_abbr"] == "dl": state_name = "territory" else: state_name = "state" - full_const = " ".join(word.capitalize() for word in row["constituency"].split("_")) + full_const = " ".join(word.capitalize() + for word in row["constituency"].split("_")) ocd_id_row = { "id": ocd_id.format(country, state_name, row["state_abbr"], row["district_abbr"], row["constituency"]), "name": rest.format(full_const, row["district"], row["state"]) } - parent = ocd_id_row["id"].rsplit("/", 1) - parent_set.add("/".join(parent[:-1]) + "," + row["district"]) + if print_top_level_ids: + parent = ocd_id_row["id"].rsplit("/", 1) + parent_set.add("/".join(parent[:-1]) + "," + row["district"]) writer.writerow(ocd_id_row) - for parent in sorted(parent_set, key=lambda x: x.split(",")[-1]): - print(parent) + if print_top_level_ids: + for parent in sorted(parent_set, key=lambda x: x.split(",")[-1]): + print(parent) +# Driver script for state_abbr, state in sorted(state_abbrevs.items()): consts = read_csv("{}_constituencies.csv".format(state_abbr)) districts = read_csv("{}_districts.csv".format(state_abbr)) diff --git a/scripts/country-in/fetch_constituencies.py b/scripts/country-in/fetch_constituencies.py index 915ae654..5432fecb 100644 --- a/scripts/country-in/fetch_constituencies.py +++ b/scripts/country-in/fetch_constituencies.py @@ -1,21 +1,41 @@ +""" +This script fetches districts and constituencies for Indian states from +the official Indian electoral site for Vidhan Sabha elections. It then +outputs constituencies and districts for each state in a CSV file named +corresponding to each state. From these CSVs, one may run the `create_ocd_id.py` +script to generate OCD-IDs (after separately creating district abbreviation +files for each state). + +The official site is https://electoralsearch.in +""" import urllib.request import csv import json +# Codes gotten from official site. state_codes = { "br": "S04", "dl": "U05", "jh": "S27", "hr": "S07", "mh": "S13" } consts_file = "{}_constituencies" districts_url = "https://electoralsearch.in/Home/GetDistList?st_code={}" constituency_url = "https://electoralsearch.in/Home/GetAcList?dist_no={}&st_code={}" -replacements = { - "_": " ", - "-": "" -} + +# CSV columns columns = ["constituency", "district"] + +# Any specific data-cleaning replacements that should be done +# should be added here. name_replacements = { "Kaimur (bhabua)".upper(): "Kaimur", "saraikela- kharswan".upper(): "Saraikela Kharswan" } +replacements = { + "_": " ", + "-": "" +} + +state_consts = {} + +# Cleans up each a given column cell. def clean_name(name): name = name.strip() name_test = name.split(" ") @@ -28,32 +48,36 @@ def clean_name(name): name = name.replace(old, new) return " ".join([n.lower().capitalize() for n in name.split(" ")]) - -state_consts = {} - +# Driver script for state, state_code in state_codes.items(): + # Get each state's district data with urllib.request.urlopen(districts_url.format(state_code)) as response: data = response.read() encoding = response.info().get_content_charset("utf-8") res = json.loads(data.decode(encoding)) const_districts = {} - for district in res: + # From each district get information to fetch its constituencies. + for district in res: dist_code = district["dist_no"] init_name = district["dist_name"] if state == "dl" and "delhi" not in district["dist_name"].lower(): init_name = init_name.replace(" ", "") + " DELHI" district_name = clean_name(init_name) - with urllib.request.urlopen(constituency_url.format(dist_code, state_code)) as const_response: + with urllib.request.urlopen(constituency_url.format( + dist_code, state_code)) as const_response: data = const_response.read() encoding = const_response.info().get_content_charset("utf-8") const_res = json.loads(data.decode(encoding)) + + # create dictionary of constituencies to their districts. for const in const_res: const_name = clean_name(const["ac_name"]) const_districts[const_name] = district_name state_consts[state] = const_districts +# For each state's constituency, write them to a state specific CSV for state, consts in sorted(state_consts.items()): with open("{}_constituencies.csv".format(state), "w+") as f: writer = csv.DictWriter(f, fieldnames=columns)