diff --git a/pylipd/globals/schema.py b/pylipd/globals/schema.py index a81574d..79025b7 100644 --- a/pylipd/globals/schema.py +++ b/pylipd/globals/schema.py @@ -27,7 +27,7 @@ 'fromJson': '_parse_persons' }, 'archiveType': { - 'name': 'proxyArchiveType', + 'name': 'archiveType', # Changed from proxyArchiveType 'alternates':[ 'archive', 'paleoDataArchive', diff --git a/pylipd/usage.py b/pylipd/usage.py index 3b00d7d..82ebbd0 100644 --- a/pylipd/usage.py +++ b/pylipd/usage.py @@ -70,20 +70,21 @@ exit() ''' - # Load from local lipd = LiPD() -data_path = local_lipd_dir + '/Ocn-Palmyra.Nurhati.2011.lpd' -lipd.load(data_path) +# data_path = local_lipd_dir + '/Ocn-Palmyra.Nurhati.2011.lpd' +# lipd.load(data_path) + +lipd.load("/Users/varun/Downloads/18.1.Franklin.Bolshoye.2024.lpd") print(lipd.get_all_dataset_names()) #lipdfiles = [local_lipd_dir + "/" + dsname + ".lpd" for dsname in dsnames] #print(lipdfiles) #lipd.load(lipdfiles) -lipd.load_from_dir("examples/data") +# lipd.load_from_dir("examples/data") -lipd.load_from_dir("/Users/varun/Downloads/example_sisal_lipds") +# lipd.load_from_dir("/Users/varun/Downloads/example_sisal_lipds") print(lipd.get_all_dataset_names()) lat = -77.08 diff --git a/pylipd/utils/lipd_to_rdf.py b/pylipd/utils/lipd_to_rdf.py index 8f6cb58..3309985 100644 --- a/pylipd/utils/lipd_to_rdf.py +++ b/pylipd/utils/lipd_to_rdf.py @@ -66,8 +66,25 @@ def convert(self, lipdpath): self.lipd_csvs = {} for csvpath, _ in csvs: csvname = os.path.basename(csvpath) - self.lipd_csvs[csvname] = pd.read_csv(csvpath, header=None) - self._load_lipd_json_to_graph(jsonpath) + try: + self.lipd_csvs[csvname] = pd.read_csv(csvpath, header=None) + except: + # If normal load doesn't work, try to detect the number of columns and load it that way + print(f"WARNING: CSV file '{csvname}' might have inconsistent number of columns !!\nDetecting number of columns to load ..\n") + self.lipd_csvs[csvname] = self._detect_columns_and_load(csvpath) + self._load_lipd_json_to_graph(jsonpath) + + def _detect_columns_and_load(self, filename): + # detect number of columns + num_columns=0 + with open(filename) as f: + for line in f.readlines(): + num = len(line.split(',')) + if num > num_columns: + num_columns=num + # load with pre-determined number of columns + df=pd.read_csv(filename,names=range(num_columns)) + return df def serialize(self, topath, type="rdf"):