diff --git a/src/transformers/models/roberta/configuration_roberta.py b/src/transformers/models/roberta/configuration_roberta.py index 3025fe2833d904..f82033f4588fde 100644 --- a/src/transformers/models/roberta/configuration_roberta.py +++ b/src/transformers/models/roberta/configuration_roberta.py @@ -46,7 +46,7 @@ class RobertaConfig(PretrainedConfig): Args: - vocab_size (`int`, *optional*, defaults to 30522): + vocab_size (`int`, *optional*, defaults to 50265): Vocabulary size of the RoBERTa model. Defines the number of different tokens that can be represented by the `inputs_ids` passed when calling [`RobertaModel`] or [`TFRobertaModel`]. hidden_size (`int`, *optional*, defaults to 768): @@ -105,7 +105,7 @@ class RobertaConfig(PretrainedConfig): def __init__( self, - vocab_size=30522, + vocab_size=50265, hidden_size=768, num_hidden_layers=12, num_attention_heads=12, diff --git a/src/transformers/models/roberta_prelayernorm/configuration_roberta_prelayernorm.py b/src/transformers/models/roberta_prelayernorm/configuration_roberta_prelayernorm.py index 49f92586c1b732..fca6763f274eab 100644 --- a/src/transformers/models/roberta_prelayernorm/configuration_roberta_prelayernorm.py +++ b/src/transformers/models/roberta_prelayernorm/configuration_roberta_prelayernorm.py @@ -45,7 +45,7 @@ class RobertaPreLayerNormConfig(PretrainedConfig): Args: - vocab_size (`int`, *optional*, defaults to 30522): + vocab_size (`int`, *optional*, defaults to 50265): Vocabulary size of the RoBERTa-PreLayerNorm model. Defines the number of different tokens that can be represented by the `inputs_ids` passed when calling [`RobertaPreLayerNormModel`] or [`TFRobertaPreLayerNormModel`]. @@ -106,7 +106,7 @@ class RobertaPreLayerNormConfig(PretrainedConfig): def __init__( self, - vocab_size=30522, + vocab_size=50265, hidden_size=768, num_hidden_layers=12, num_attention_heads=12,