[Paper link] [Toponym Detection Demo] [CodeForHuggingFace]
- Clone this repository:
git clone [email protected]:knowledge-computing/geolm.git
cd geolm
- Install packages
conda create -n geolm_env python=3.8 -y
conda activate geolm_env
pip install --upgrade pip
pip install -r requirements.txt
- Change directory to the pre-training script folder
cd src
- Run
train_joint.py
python3 train_joint.py --model_save_dir=OUTPUT_WEIGHT_DIR --pseudo_sentence_dir='../../datasets/osm_pseudo_sent/world/' --nl_sentence_dir='../../datasets/wikidata/world_georelation/joint_v2/' --batch_size=28 --lr=1e-5 --spatial_dist_fill=900 --placename_to_osmid_path='../../datasets/osm_pseudo_sent/name-osmid-dict/placename_to_osmid.json'
- Train with in-domain dataset
cd experiments/toponym_detection/
python3 train_geobert_toponym.py --model_save_dir=OUTPUT_TOPONYM_WEIGHT_DIR --model_option='geobert-base' --model_checkpoint_path=PRETRAINED_MODEL_WEIGHT --lr=1e-5 --epochs=30 --input_file_path=DATASET_PATH
- Test with in-domain dataset
cd experiments/toponym_detection/
python3 test_geobert_toponym.py --model_option='geobert-base' --model_save_path=TOPONYM_MODEL_PATH --input_file_path=DATASET_PATH --spatial_dist_fill=90000
python3 multi_link_geonames.py --model_name='joint-base' --query_dataset_path=DATASET_PATH --ref_dataset_path=CANDIDATES_FILE_PATH --distance_norm_factor=100 --spatial_dist_fill=90000 --spatial_bert_weight_dir=PRETRAINED_WEIGHT_DIR --spatial_bert_weight_name=PRETRAINED_WEIGHT_FILE --out_dir=OUTPUT_FOLDER
- Train with in-domain dataset
python3 train_cls_joint.py --lr=1e-5 --sep_between_neighbors --bert_option='bert-base' --with_type --mlm_checkpoint_path=PRETRAINED_MODEL_PATH --epochs=30 --max_token_len=512 --model_save_dir=OUTPUT_TYPING_WEIGHT_DIR --spatial_dist_fill=90000
- Test with in-domain dataset
python3 test_cls_joint.py --sep_between_neighbors --bert_option='bert-base' --with_type --checkpoint_path=TYPING_WEIGHT_PATH
@article{li2023geolm,
title={GeoLM: Empowering Language Models for Geospatially Grounded Language Understanding},
author={Li, Zekun and Zhou, Wenxuan and Chiang, Yao-Yi and Chen, Muhao},
journal={arXiv preprint arXiv:2310.14478},
year={2023}
}
CC BY-NC 4.0