tests done docs done remaining others & nits

huggingface · Mar 18, 2023 · d6f6f7e · d6f6f7e
1 parent 7a054bf
commit d6f6f7e
Show file tree

Hide file tree

Showing 9 changed files with 267 additions and 539 deletions.
diff --git a/docs/source/en/model_doc/pop2piano.mdx b/docs/source/en/model_doc/pop2piano.mdx
@@ -18,8 +18,8 @@ The Pop2Piano model was proposed in [Pop2Piano : Pop Audio-based Piano Cover Gen
 
 The abstract from the paper is the following:
 
-*The piano cover of pop music is widely enjoyed by people. How-
-ever, the generation task of the pop piano cover is still understudied.
+*The piano cover of pop music is widely enjoyed by people. How-ever,
+the generation task of the pop piano cover is still understudied.
 This is partly due to the lack of synchronized {Pop, Piano
 Cover} data pairs, which made it challenging to apply the latest
 data-intensive deep learning-based methods. To leverage the power
@@ -32,6 +32,8 @@ a piano cover from pop audio without melody and chord extraction
 modules. We show that Pop2Piano trained with our dataset can
 generate plausible piano covers.*
 
+<Check on how to implement (show) examples>
+
 Tips:
 
 <INSERT TIPS ABOUT MODEL HERE>
@@ -44,14 +46,14 @@ The original code can be found [here](https://github.com/sweetcocoa/pop2piano).
 
 [[autodoc]] Pop2PianoConfig
 
-## Pop2PianoModel
+## Pop2PianoFeatureExtractor
 
-[[autodoc]] Pop2PianoModel
-    - forward
-    - _mask_input_features
+[[autodoc]] WhisperFeatureExtractor
+    - __call__
 
 ## Pop2PianoForConditionalGeneration
 
 [[autodoc]] Pop2PianoForConditionalGeneration
     - forward
+    - generate
 
diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py
@@ -2617,7 +2617,6 @@
         [
             "POP2PIANO_PRETRAINED_MODEL_ARCHIVE_LIST",
             "Pop2PianoForConditionalGeneration",
-            # "Pop2PianoModel",
             "Pop2PianoPreTrainedModel",
         ]
     )
@@ -5863,7 +5862,6 @@
         from .models.pop2piano import (
             POP2PIANO_PRETRAINED_MODEL_ARCHIVE_LIST,
             Pop2PianoForConditionalGeneration,
-            # Pop2PianoModel,
             Pop2PianoPreTrainedModel,
         )
         from .models.x_clip import (

diff --git a/src/transformers/models/auto/tokenization_auto.py b/src/transformers/models/auto/tokenization_auto.py
@@ -306,7 +306,6 @@
             ("wav2vec2-conformer", ("Wav2Vec2CTCTokenizer", None)),
             ("wav2vec2_phoneme", ("Wav2Vec2PhonemeCTCTokenizer", None)),
             ("whisper", ("WhisperTokenizer", "WhisperTokenizerFast" if is_tokenizers_available() else None)),
-            ("pop2piano", ("WhisperTokenizer", "WhisperTokenizerFast" if is_tokenizers_available() else None)),
             ("xclip", ("CLIPTokenizer", "CLIPTokenizerFast" if is_tokenizers_available() else None)),
             (
                 "xglm",

diff --git a/src/transformers/models/pop2piano/configuration_pop2piano.py b/src/transformers/models/pop2piano/configuration_pop2piano.py
@@ -23,7 +23,7 @@
 logger = logging.get_logger(__name__)
 
 POP2PIANO_PRETRAINED_CONFIG_ARCHIVE_MAP = {
-    "sweetcocoa/pop2piano": "https://huggingface.co/sweetcocoa/pop2piano/blob/main/config.json"
+    "susnato/pop2piano_dev": "https://huggingface.co/susnato/pop2piano_dev/blob/main/config.json" # For now
 }
 
 COMPOSER_TO_FEATURE_TOKEN = {'composer1': 2052,
@@ -52,16 +52,17 @@
 # Adapted from transformers.models.t5.configuration_t5.T5Config with T5->Pop2Piano,T5Model->Pop2PianoModel,t5->pop2piano,T5Block->Pop2PianoBlock
 class Pop2PianoConfig(PretrainedConfig):
     r"""
-    This is the configuration class to store the configuration of a [`Pop2PianoModel`]. It is used to instantiate a
-    Pop2Piano model according to the specified arguments, defining the model architecture. Instantiating a configuration
+    This is the configuration class to store the configuration of a [`Pop2PianoForConditionalGeneration`]. It is used to instantiate a
+    Pop2PianoForConditionalGeneration model according to the specified arguments, defining the model architecture. Instantiating a configuration
     with the defaults will yield a similar configuration to that of the Pop2Piano
     [sweetcocoa/pop2piano](https://huggingface.co/sweetcocoa/pop2piano) architecture.
     Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
     documentation from [`PretrainedConfig`] for more information.
+
     Arguments:
         vocab_size (`int`, *optional*, defaults to 32128):
-            Vocabulary size of the Pop2Piano model. Defines the number of different tokens that can be represented by the
-            `inputs_ids` passed when calling [`Pop2PianoModel`].
+            Vocabulary size of the Pop2PianoForConditionalGeneration model. Defines the number of different tokens that can be represented by the
+            `inputs_ids` passed when calling [`Pop2PianoForConditionalGeneration`].
         d_model (`int`, *optional*, defaults to 512):
             Size of the encoder layers and the pooler layer.
         d_kv (`int`, *optional*, defaults to 64):
@@ -100,7 +101,7 @@ class Pop2PianoConfig(PretrainedConfig):
             Number of mel filterbanks.
         dense_act_fn (`string`, *optional*, defaults to `"relu"`):
             Type of Activation Function to be used in `Pop2PianoDenseActDense` and in `Pop2PianoDenseGatedActDense`.
-        dataset_sample_rate (`int` *optional*, defaults to 22050):
+        dataset_sampling_rate (`int` *optional*, defaults to 22050):
             Sample rate of audio signal.
         dataset_mel_is_conditioned (`bool`, *optional*, defaults to `True`):
             Whether to use `ConcatEmbeddingToMel` or not.
@@ -136,7 +137,7 @@ def __init__(
         dense_act_fn="relu",
         dataset_target_length=256,
         dataset_n_bars=2,
-        dataset_sample_rate=22050,
+        dataset_sampling_rate=22050,
         dataset_mel_is_conditioned=True,
         n_fft=4096,
         hop_length=1024,
@@ -171,7 +172,7 @@ def __init__(
         self.composer_to_feature_token = COMPOSER_TO_FEATURE_TOKEN
         self.dataset = {'target_length': dataset_target_length,
                         'n_bars': dataset_n_bars,
-                        'sample_rate': dataset_sample_rate,
+                        'sampling_rate': dataset_sampling_rate,
                         'mel_is_conditioned': dataset_mel_is_conditioned}
         self.n_fft = n_fft
         self.hop_length = hop_length