From 205881f536eff68ba7baffb720fe33c7e9962cfd Mon Sep 17 00:00:00 2001 From: yashgadhiya10 Date: Thu, 29 Aug 2024 22:36:50 +0530 Subject: [PATCH] Created new Dataset North-Uganda-2017 --- data/raw.dvc | 6 +++--- datasets.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/data/raw.dvc b/data/raw.dvc index bc5cdb43..f13f5629 100644 --- a/data/raw.dvc +++ b/data/raw.dvc @@ -1,6 +1,6 @@ outs: -- md5: 7ba2a5b698999a9bfa635252b5090ff7.dir - size: 446381456 - nfiles: 406 +- md5: 0916e33f6eef6c80a87e319427005f5e.dir + size: 446720790 + nfiles: 408 path: raw hash: md5 diff --git a/datasets.py b/datasets.py index 7e690524..920cbc46 100644 --- a/datasets.py +++ b/datasets.py @@ -506,6 +506,37 @@ def load_labels(self) -> pd.DataFrame: df[START], df[END] = date(2016, 1, 1), date(2017, 12, 31) df[SUBSET] = train_val_test_split(df.index, 0.3, 0.3) return df + +class Uganda_NorthCEO2017(LabeledDataset): + def load_labels(self) -> pd.DataFrame: + raw_folder = raw_dir / "Uganda_North_2017" + df1 = pd.read_csv( + raw_folder + / "ceo-UNHCR-North-Uganda-Feb-2017---Feb-2018-(Set-1)-sample-data-2024-08-29.csv" + ) + df2 = pd.read_csv( + raw_folder + / "ceo-UNHCR-North-Uganda-Feb-2017---Feb-2018-(Set-2)-sample-data-2024-08-29.csv" + ) + df = pd.concat([df1, df2]) + + # Discard rows with no label + df = df[~df["Does this pixel contain active cropland?"].isna()].copy() + df[CLASS_PROB] = df["Does this pixel contain active cropland?"] == "Crop" + df[CLASS_PROB] = df[CLASS_PROB].astype(int) + df["num_labelers"] = 1 + df = df.groupby([LON, LAT], as_index=False, sort=False).agg( + { + CLASS_PROB: "mean", + "num_labelers": "sum", + "plotid": join_unique, + "sampleid": join_unique, + "email": join_unique, + } + ) + df[START], df[END] = date(2017, 1, 1), date(2018, 12, 31) + df[SUBSET] = train_val_test_split(df.index, 0.3, 0.3) + return df class Uganda_NorthCEO2021(LabeledDataset): @@ -1538,6 +1569,7 @@ def load_labels(self) -> pd.DataFrame: TanzaniaCropArea2019(), FranceCropArea2020(), Uganda_NorthCEO2016(), + Uganda_NorthCEO2017(), ] if __name__ == "__main__":