Skip to content

Commit

Permalink
assign KEN geo ids
Browse files Browse the repository at this point in the history
  • Loading branch information
pgonzalez01 committed Mar 19, 2024
1 parent 28725cc commit 2832892
Show file tree
Hide file tree
Showing 8 changed files with 72,093 additions and 0 deletions.
72,020 changes: 72,020 additions & 0 deletions files_for_db/geo/ken_geo.csv

Large diffs are not rendered by default.

Binary file added files_for_db/shps/ken.zip
Binary file not shown.
1 change: 1 addition & 0 deletions files_for_db/shps/ken/ken.cpg
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ISO-8859-1
Binary file added files_for_db/shps/ken/ken.dbf
Binary file not shown.
1 change: 1 addition & 0 deletions files_for_db/shps/ken/ken.prj
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]]
Binary file added files_for_db/shps/ken/ken.shp
Binary file not shown.
Binary file added files_for_db/shps/ken/ken.shx
Binary file not shown.
71 changes: 71 additions & 0 deletions scripts/geo/ken_geo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import geopandas as gpd
import pandas as pd
import shutil
import os
from utils import *

# Fetch and read the dataset
DATA_PATH = str(os.path.abspath(os.path.join(__file__ ,"../../.."))) + "/data/KEN/"
ken_schools = pd.read_csv(DATA_PATH + "/Kenya MOE Schools - Schools.csv")

# Subset the data
ken_schools = ken_schools[["Name", "Longitude", "Latitude"]]
# Rename columns
ken_schools.rename(columns = {'Name':'school_name', 'Longitude':'longitude', 'Latitude':'latitude'}, inplace = True)

# Generate GEO ID's
ken_schools['geo_id'] = pd.Series(range(0,len(ken_schools)+1)).apply(lambda x: 'KEN-{0:0>6}'.format(x))
# Add address variable
ken_schools["address"] = None

# Generate dependency ID's
deped_id_ls = ["Kenya MOE Schools - Schools" + f".{idx}" for idx in range(1,ken_schools.shape[0]+1)]
ken_schools["deped_id"] = deped_id_ls

# Add ADM level variables
longs = ken_schools["longitude"].values
lats = ken_schools["latitude"].values
cols = ["geo_id", "deped_id", "school_name", "address","adm0"]

# Add ADM0
ken_schools["adm0"] = "KEN"

# Add ADM1, ADM2, ADM3
for adm in range(1, 4):
try:
cols += ["adm" + str(adm)]
downloadGB("KEN", str(adm), "../../gb")
shp = gpd.read_file(getGBpath("KEN", f"ADM{str(adm)}", "../../gb"))
ken_schools = gpd.GeoDataFrame(ken_schools, geometry = gpd.points_from_xy(ken_schools.longitude, ken_schools.latitude))
ken_schools = gpd.tools.sjoin(ken_schools, shp, how = "left").rename(columns = {"shapeName": "adm" + str(adm)})[cols]
ken_schools["longitude"] = longs
ken_schools["latitude"] = lats
print(ken_schools.head())

except Exception as e:
ken_schools["adm" + str(adm)] = None
print(e)


PATH = str(os.path.abspath(os.path.join(__file__ ,"../../..")))

# Add KEN csv file to files_for_db
ken_schools.to_csv(PATH + "/files_for_db/geo/ken_geo.csv", index = False)

gdf = gpd.GeoDataFrame(
ken_schools,
geometry = gpd.points_from_xy(
x = ken_schools.longitude,
y = ken_schools.latitude,
crs = 'EPSG:4326', # or: crs = pyproj.CRS.from_user_input(4326)
)
)

if not os.path.exists(PATH + "/files_for_db/shps/ken/"):
os.mkdir(PATH + "/files_for_db/shps/ken/")

# Add shape file to files_for_db
gdf.to_file(PATH + "/files_for_db/shps/ken/ken.shp", index = False)

# Add zip file to files_for_db
shutil.make_archive(PATH + "/files_for_db/shps/ken", 'zip', PATH + "/files_for_db/shps/ken")

0 comments on commit 2832892

Please sign in to comment.