-
Notifications
You must be signed in to change notification settings - Fork 0
/
pharmaconer_deploy_parameters.ini
executable file
·131 lines (101 loc) · 7.29 KB
/
pharmaconer_deploy_parameters.ini
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#----- Possible modes of operation -----------------------------------------------------------------------------------------------------------------#
# training mode (from scratch): set train_model to True, and use_pretrained_model to False (if training from scratch). #
# Must have train and valid sets in the dataset_text_folder, and test and deployment sets are optional. #
# training mode (from pretrained model): set train_model to True, and use_pretrained_model to True (if training from a pretrained model). #
# Must have train and valid sets in the dataset_text_folder, and test and deployment sets are optional. #
# prediction mode (using pretrained model): set train_model to False, and use_pretrained_model to True. #
# Must have either a test set or a deployment set. #
# NOTE: Whenever use_pretrained_model is set to True, pretrained_model_folder must be set to the folder containing the pretrained model to use, and #
# model.ckpt, dataset.pickle and parameters.ini must exist in the same folder as the checkpoint file. #
#---------------------------------------------------------------------------------------------------------------------------------------------------#
[mode]
# At least one of use_pretrained_model and train_model must be set to True.
train_model = False
use_pretrained_model = True
pretrained_model_folder = /home/jordiae/Documents/PharmacoNERTask/FarmacoNER/src/CustomNeuroNER/trained_models/phamarconer_deploy_test
# pretrained_model_folder = /srv/PharmaCoNERTaggerDemo/PlanTL-PharmacoNER/trained_models/phamarconer_deploy_test
[dataset]
dataset_text_folder =
# main_evaluation_mode should be either 'conll', 'bio', 'token', or 'binary'. ('conll' is entity-based)
# It determines which metric to use for early stopping, displaying during training, and plotting F1-score vs. epoch.
main_evaluation_mode = conll
output_folder = /home/jordiae/Documents/PharmacoNERTask/FarmacoNER/task/deploy_test_output2
# output_folder = /srv/PharmaCoNERTaggerDemo/demos_output
#---------------------------------------------------------------------------------------------------------------------#
# The parameters below are for advanced users. Their default values should yield good performance in most cases. #
#---------------------------------------------------------------------------------------------------------------------#
[ann]
use_character_lstm = True
character_embedding_dimension = 25
character_lstm_hidden_state_dimension = 25
use_pos = False
# In order to use random initialization instead, set token_pretrained_embedding_filepath to empty string, as below:
# token_pretrained_embedding_filepath =
token_pretrained_embedding_filepath =
token_embedding_dimension = 300
token_lstm_hidden_state_dimension = 300
use_crf = True
[training]
patience = 10
maximum_number_of_epochs = 100
# optimizer should be either 'sgd', 'adam', or 'adadelta'
optimizer = sgd
learning_rate = 0.005
# gradients will be clipped above |gradient_clipping_value| and below -|gradient_clipping_value|, if gradient_clipping_value is non-zero
# (set to 0 to disable gradient clipping)
gradient_clipping_value = 5.0
# dropout_rate should be between 0 and 1
dropout_rate = 0.5
# Upper bound on the number of CPU threads NeuroNER will use
number_of_cpu_threads = 48
# Upper bound on the number of GPU NeuroNER will use
# If number_of_gpus > 0, you need to have installed tensorflow-gpu
number_of_gpus = 0
[advanced]
# experiment_name = 5baseline_fasttext_scielo_wikipedia
# tagging_format should be either 'bioes' or 'bio'
tagging_format = bioes
# tokenizer should be either 'spacy' or 'stanford'. The tokenizer is only used when the original data is provided only in BRAT format.
# - 'spacy' refers to spaCy (https://spacy.io). To install spacy: pip install -U spacy
# - 'stanford' refers to Stanford CoreNLP (https://stanfordnlp.github.io/CoreNLP/). Stanford CoreNLP is written in Java: to use it one has to start a
# Stanford CoreNLP server, which can tokenize sentences given on the fly. Stanford CoreNLP is portable, which means that it can be run
# without any installation.
# To download Stanford CoreNLP: https://stanfordnlp.github.io/CoreNLP/download.html
# To run Stanford CoreNLP, execute in the terminal: `java -mx4g -cp "*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 9000 -timeout 50000`
# By default Stanford CoreNLP is in English. To use it in other languages, see: https://stanfordnlp.github.io/CoreNLP/human-languages.html
# Stanford CoreNLP 3.6.0 and higher requires Java 8. We have tested NeuroNER with Stanford CoreNLP 3.6.0.
tokenizer = spacy
# spacylanguage should be either 'de' (German), 'en' (English) or 'fr' (French). (https://spacy.io/docs/api/language-models)
# To install the spaCy language: `python -m spacy.de.download`; or `python -m spacy.en.download`; or `python -m spacy.fr.download`
spacylanguage = es
# If remap_unknown_tokens is set to True, map to UNK any token that hasn't been seen in neither the training set nor the pre-trained token embeddings.
remap_unknown_tokens_to_unk = True
# If load_only_pretrained_token_embeddings is set to True, then token embeddings will only be loaded if it exists in token_pretrained_embedding_filepath
# or in pretrained_model_checkpoint_filepath, even for the training set.
load_only_pretrained_token_embeddings = False
# If load_all_pretrained_token_embeddings is set to True, then all pretrained token embeddings will be loaded even for the tokens that do not appear in the dataset.
load_all_pretrained_token_embeddings = False
# If check_for_lowercase is set to True, the lowercased version of each token will also be checked when loading the pretrained embeddings.
# For example, if the token 'Boston' does not exist in the pretrained embeddings, then it is mapped to the embedding of its lowercased version 'boston',
# if it exists among the pretrained embeddings.
check_for_lowercase = True
# If check_for_digits_replaced_with_zeros is set to True, each token with digits replaced with zeros will also be checked when loading pretrained embeddings.
# For example, if the token '123-456-7890' does not exist in the pretrained embeddings, then it is mapped to the embedding of '000-000-0000',
# if it exists among the pretrained embeddings.
# If both check_for_lowercase and check_for_digits_replaced_with_zeros are set to True, then the lowercased version is checked before the digit-zeroed version.
check_for_digits_replaced_with_zeros = True
# If freeze_token_embeddings is set to True, token embedding will remain frozen (not be trained).
freeze_token_embeddings = False
freeze_pos = False
# If debug is set to True, only 200 lines will be loaded for each split of the dataset.
debug = False
verbose = False
# plot_format specifies the format of the plots generated by NeuroNER. It should be either 'png' or 'pdf'.
plot_format = pdf
# specify which layers to reload from the pretrained model
reload_character_embeddings = True
reload_character_lstm = True
reload_token_embeddings = True
reload_token_lstm = True
reload_feedforward = True
reload_crf = True