zh-CN - Chapter 6finished

huggingface · Aug 1, 2022 · e69fce2 · e69fce2
1 parent aebb46e
commit e69fce2
Show file tree

Hide file tree

Showing 46 changed files with 3,527 additions and 119 deletions.
diff --git a/chapters/de/chapter3/3_tf.mdx b/chapters/de/chapter3/3_tf.mdx
@@ -85,8 +85,7 @@ model.compile(
     metrics=["accuracy"],
 )
 model.fit(
-    tf_train_dataset,
-    validation_data=tf_validation_dataset,
+    tf_train_dataset, validation_data=tf_validation_dataset,
 )
 ```
 

diff --git a/chapters/en/chapter1/3.mdx b/chapters/en/chapter1/3.mdx
@@ -150,9 +150,7 @@ from transformers import pipeline
 
 generator = pipeline("text-generation", model="distilgpt2")
 generator(
-    "In this course, we will teach you how to",
-    max_length=30,
-    num_return_sequences=2,
+    "In this course, we will teach you how to", max_length=30, num_return_sequences=2,
 )
 ```
 

diff --git a/chapters/en/chapter2/2.mdx b/chapters/en/chapter2/2.mdx
@@ -39,10 +39,7 @@ from transformers import pipeline
 
 classifier = pipeline("sentiment-analysis")
 classifier(
-    [
-        "I've been waiting for a HuggingFace course my whole life.",
-        "I hate this so much!",
-    ]
+    ["I've been waiting for a HuggingFace course my whole life.", "I hate this so much!",]
 )
 ```
 

diff --git a/chapters/en/chapter3/3_tf.mdx b/chapters/en/chapter3/3_tf.mdx
@@ -85,8 +85,7 @@ model.compile(
     metrics=["accuracy"],
 )
 model.fit(
-    tf_train_dataset,
-    validation_data=tf_validation_dataset,
+    tf_train_dataset, validation_data=tf_validation_dataset,
 )
 ```
 

diff --git a/chapters/en/chapter5/4.mdx b/chapters/en/chapter5/4.mdx
@@ -88,7 +88,7 @@ Here the `rss` attribute refers to the _resident set size_, which is the fractio
 
 ```py
 print(f"Number of files in dataset : {pubmed_dataset.dataset_size}")
-size_gb = pubmed_dataset.dataset_size / (1024**3)
+size_gb = pubmed_dataset.dataset_size / (1024 ** 3)
 print(f"Dataset size (cache file) : {size_gb:.2f} GB")
 ```
 

diff --git a/chapters/en/chapter6/8.mdx b/chapters/en/chapter6/8.mdx
@@ -404,9 +404,7 @@ Great! Now that we're done, we can save the tokenizer like before, and wrap it i
 from transformers import PreTrainedTokenizerFast
 
 wrapped_tokenizer = PreTrainedTokenizerFast(
-    tokenizer_object=tokenizer,
-    bos_token="<|endoftext|>",
-    eos_token="<|endoftext|>",
+    tokenizer_object=tokenizer, bos_token="<|endoftext|>", eos_token="<|endoftext|>",
 )
 ```
 

diff --git a/chapters/en/chapter7/2.mdx b/chapters/en/chapter7/2.mdx
@@ -413,9 +413,7 @@ Now we can just pass them to the `TFAutoModelForTokenClassification.from_pretrai
 from transformers import TFAutoModelForTokenClassification
 
 model = TFAutoModelForTokenClassification.from_pretrained(
-    model_checkpoint,
-    id2label=id2label,
-    label2id=label2id,
+    model_checkpoint, id2label=id2label, label2id=label2id,
 )
 ```
 
@@ -663,9 +661,7 @@ Now we can just pass them to the `AutoModelForTokenClassification.from_pretraine
 from transformers import AutoModelForTokenClassification
 
 model = AutoModelForTokenClassification.from_pretrained(
-    model_checkpoint,
-    id2label=id2label,
-    label2id=label2id,
+    model_checkpoint, id2label=id2label, label2id=label2id,
 )
 ```
 
@@ -774,10 +770,7 @@ First we need to build the `DataLoader`s from our datasets. We'll reuse our `dat
 from torch.utils.data import DataLoader
 
 train_dataloader = DataLoader(
-    tokenized_datasets["train"],
-    shuffle=True,
-    collate_fn=data_collator,
-    batch_size=8,
+    tokenized_datasets["train"], shuffle=True, collate_fn=data_collator, batch_size=8,
 )
 eval_dataloader = DataLoader(
     tokenized_datasets["validation"], collate_fn=data_collator, batch_size=8
@@ -788,9 +781,7 @@ Next we reinstantiate our model, to make sure we're not continuing the fine-tuni
 
 ```py
 model = AutoModelForTokenClassification.from_pretrained(
-    model_checkpoint,
-    id2label=id2label,
-    label2id=label2id,
+    model_checkpoint, id2label=id2label, label2id=label2id,
 )
 ```
 

diff --git a/chapters/en/chapter7/4.mdx b/chapters/en/chapter7/4.mdx
@@ -795,10 +795,7 @@ from torch.utils.data import DataLoader
 
 tokenized_datasets.set_format("torch")
 train_dataloader = DataLoader(
-    tokenized_datasets["train"],
-    shuffle=True,
-    collate_fn=data_collator,
-    batch_size=8,
+    tokenized_datasets["train"], shuffle=True, collate_fn=data_collator, batch_size=8,
 )
 eval_dataloader = DataLoader(
     tokenized_datasets["validation"], collate_fn=data_collator, batch_size=8

diff --git a/chapters/en/chapter7/5.mdx b/chapters/en/chapter7/5.mdx
@@ -928,8 +928,7 @@ for epoch in range(num_train_epochs):
     for step, batch in enumerate(eval_dataloader):
         with torch.no_grad():
             generated_tokens = accelerator.unwrap_model(model).generate(
-                batch["input_ids"],
-                attention_mask=batch["attention_mask"],
+                batch["input_ids"], attention_mask=batch["attention_mask"],
             )
 
             generated_tokens = accelerator.pad_across_processes(

diff --git a/chapters/en/chapter7/7.mdx b/chapters/en/chapter7/7.mdx
@@ -1029,10 +1029,7 @@ validation_set = validation_dataset.remove_columns(["example_id", "offset_mappin
 validation_set.set_format("torch")
 
 train_dataloader = DataLoader(
-    train_dataset,
-    shuffle=True,
-    collate_fn=default_data_collator,
-    batch_size=8,
+    train_dataset, shuffle=True, collate_fn=default_data_collator, batch_size=8,
 )
 eval_dataloader = DataLoader(
     validation_set, collate_fn=default_data_collator, batch_size=8

diff --git a/chapters/es/chapter1/3.mdx b/chapters/es/chapter1/3.mdx
@@ -153,9 +153,7 @@ from transformers import pipeline
 
 generator = pipeline("text-generation", model="distilgpt2")
 generator(
-    "In this course, we will teach you how to",
-    max_length=30,
-    num_return_sequences=2,
+    "In this course, we will teach you how to", max_length=30, num_return_sequences=2,
 )
 ```
 

diff --git a/chapters/fa/chapter2/2.mdx b/chapters/fa/chapter2/2.mdx
@@ -43,10 +43,7 @@ from transformers import pipeline
 
 classifier = pipeline("sentiment-analysis")
 classifier(
-    [
-        "I've been waiting for a HuggingFace course my whole life.",
-        "I hate this so much!",
-    ]
+    ["I've been waiting for a HuggingFace course my whole life.", "I hate this so much!",]
 )
 ```
 

diff --git a/chapters/hi/chapter1/3.mdx b/chapters/hi/chapter1/3.mdx
@@ -166,9 +166,7 @@ from transformers import pipeline
 
 generator = pipeline("text-generation", model="distilgpt2")
 generator(
-    "In this course, we will teach you how to",
-    max_length=30,
-    num_return_sequences=2,
+    "In this course, we will teach you how to", max_length=30, num_return_sequences=2,
 )
 ```
 

diff --git a/chapters/hi/chapter3/3_tf.mdx b/chapters/hi/chapter3/3_tf.mdx
@@ -85,8 +85,7 @@ model.compile(
     metrics=["accuracy"],
 )
 model.fit(
-    tf_train_dataset,
-    validation_data=tf_validation_dataset,
+    tf_train_dataset, validation_data=tf_validation_dataset,
 )
 ```
 

diff --git a/chapters/it/chapter1/3.mdx b/chapters/it/chapter1/3.mdx
@@ -150,9 +150,7 @@ from transformers import pipeline
 
 generator = pipeline("text-generation", model="distilgpt2")
 generator(
-    "In this course, we will teach you how to",
-    max_length=30,
-    num_return_sequences=2,
+    "In this course, we will teach you how to", max_length=30, num_return_sequences=2,
 )
 ```
 

diff --git a/chapters/ja/chapter7/2.mdx b/chapters/ja/chapter7/2.mdx
@@ -419,9 +419,7 @@ label2id = {v: k for k, v in id2label.items()}
 from transformers import TFAutoModelForTokenClassification
 
 model = TFAutoModelForTokenClassification.from_pretrained(
-    model_checkpoint,
-    id2label=id2label,
-    label2id=label2id,
+    model_checkpoint, id2label=id2label, label2id=label2id,
 )
 ```
 
@@ -685,9 +683,7 @@ label2id = {v: k for k, v in id2label.items()}
 from transformers import AutoModelForTokenClassification
 
 model = AutoModelForTokenClassification.from_pretrained(
-    model_checkpoint,
-    id2label=id2label,
-    label2id=label2id,
+    model_checkpoint, id2label=id2label, label2id=label2id,
 )
 ```
 
@@ -806,10 +802,7 @@ trainer.push_to_hub(commit_message="Training complete")
 from torch.utils.data import DataLoader
 
 train_dataloader = DataLoader(
-    tokenized_datasets["train"],
-    shuffle=True,
-    collate_fn=data_collator,
-    batch_size=8,
+    tokenized_datasets["train"], shuffle=True, collate_fn=data_collator, batch_size=8,
 )
 eval_dataloader = DataLoader(
     tokenized_datasets["validation"], collate_fn=data_collator, batch_size=8
@@ -820,9 +813,7 @@ eval_dataloader = DataLoader(
 
 ```py
 model = AutoModelForTokenClassification.from_pretrained(
-    model_checkpoint,
-    id2label=id2label,
-    label2id=label2id,
+    model_checkpoint, id2label=id2label, label2id=label2id,
 )
 ```
 

diff --git a/chapters/ja/chapter7/4.mdx b/chapters/ja/chapter7/4.mdx
@@ -817,10 +817,7 @@ from torch.utils.data import DataLoader
 
 tokenized_datasets.set_format("torch")
 train_dataloader = DataLoader(
-    tokenized_datasets["train"],
-    shuffle=True,
-    collate_fn=data_collator,
-    batch_size=8,
+    tokenized_datasets["train"], shuffle=True, collate_fn=data_collator, batch_size=8,
 )
 eval_dataloader = DataLoader(
     tokenized_datasets["validation"], collate_fn=data_collator, batch_size=8

diff --git a/chapters/ja/chapter7/5.mdx b/chapters/ja/chapter7/5.mdx
@@ -940,8 +940,7 @@ for epoch in range(num_train_epochs):
     for step, batch in enumerate(eval_dataloader):
         with torch.no_grad():
             generated_tokens = accelerator.unwrap_model(model).generate(
-                batch["input_ids"],
-                attention_mask=batch["attention_mask"],
+                batch["input_ids"], attention_mask=batch["attention_mask"],
             )
 
             generated_tokens = accelerator.pad_across_processes(

diff --git a/chapters/ja/chapter7/7.mdx b/chapters/ja/chapter7/7.mdx
@@ -1039,10 +1039,7 @@ validation_set = validation_dataset.remove_columns(["example_id", "offset_mappin
 validation_set.set_format("torch")
 
 train_dataloader = DataLoader(
-    train_dataset,
-    shuffle=True,
-    collate_fn=default_data_collator,
-    batch_size=8,
+    train_dataset, shuffle=True, collate_fn=default_data_collator, batch_size=8,
 )
 eval_dataloader = DataLoader(
     validation_set, collate_fn=default_data_collator, batch_size=8

diff --git a/chapters/ko/chapter1/3.mdx b/chapters/ko/chapter1/3.mdx
@@ -150,9 +150,7 @@ from transformers import pipeline
 
 generator = pipeline("text-generation", model="distilgpt2")
 generator(
-    "In this course, we will teach you how to",
-    max_length=30,
-    num_return_sequences=2,
+    "In this course, we will teach you how to", max_length=30, num_return_sequences=2,
 )
 ```
 

diff --git a/chapters/pt/chapter1/3.mdx b/chapters/pt/chapter1/3.mdx
@@ -152,9 +152,7 @@ from transformers import pipeline
 
 generator = pipeline("text-generation", model="distilgpt2")
 generator(
-    "In this course, we will teach you how to",
-    max_length=30,
-    num_return_sequences=2,
+    "In this course, we will teach you how to", max_length=30, num_return_sequences=2,
 )
 ```
 

diff --git a/chapters/pt/chapter2/2.mdx b/chapters/pt/chapter2/2.mdx
@@ -39,10 +39,7 @@ from transformers import pipeline
 
 classifier = pipeline("sentiment-analysis")
 classifier(
-    [
-        "I've been waiting for a HuggingFace course my whole life.",
-        "I hate this so much!",
-    ]
+    ["I've been waiting for a HuggingFace course my whole life.", "I hate this so much!",]
 )
 ```
 

diff --git a/chapters/pt/chapter5/4.mdx b/chapters/pt/chapter5/4.mdx
@@ -88,7 +88,7 @@ Aqui o atributo `rss` refere-se ao _tamanho do conjunto residente_, que é a fra
 
 ```py
 print(f"Number of files in dataset : {pubmed_dataset.dataset_size}")
-size_gb = pubmed_dataset.dataset_size / (1024**3)
+size_gb = pubmed_dataset.dataset_size / (1024 ** 3)
 print(f"Dataset size (cache file) : {size_gb:.2f} GB")
 ```
 

diff --git a/chapters/ru/chapter1/3.mdx b/chapters/ru/chapter1/3.mdx
@@ -153,9 +153,7 @@ from transformers import pipeline
 
 generator = pipeline("text-generation", model="distilgpt2")
 generator(
-    "In this course, we will teach you how to",
-    max_length=30,
-    num_return_sequences=2,
+    "In this course, we will teach you how to", max_length=30, num_return_sequences=2,
 )
 ```
 

diff --git a/chapters/ru/chapter2/2.mdx b/chapters/ru/chapter2/2.mdx
@@ -39,10 +39,7 @@ from transformers import pipeline
 
 classifier = pipeline("sentiment-analysis")
 classifier(
-    [
-        "I've been waiting for a HuggingFace course my whole life.",
-        "I hate this so much!",
-    ]
+    ["I've been waiting for a HuggingFace course my whole life.", "I hate this so much!",]
 )
 ```
 

diff --git a/chapters/ru/chapter3/3_tf.mdx b/chapters/ru/chapter3/3_tf.mdx
@@ -85,8 +85,7 @@ model.compile(
     metrics=["accuracy"],
 )
 model.fit(
-    tf_train_dataset,
-    validation_data=tf_validation_dataset,
+    tf_train_dataset, validation_data=tf_validation_dataset,
 )
 ```
 

diff --git a/chapters/th/chapter1/3.mdx b/chapters/th/chapter1/3.mdx
@@ -151,9 +151,7 @@ from transformers import pipeline
 
 generator = pipeline("text-generation", model="distilgpt2")
 generator(
-    "In this course, we will teach you how to",
-    max_length=30,
-    num_return_sequences=2,
+    "In this course, we will teach you how to", max_length=30, num_return_sequences=2,
 )
 ```
 

diff --git a/chapters/th/chapter2/2.mdx b/chapters/th/chapter2/2.mdx
@@ -39,10 +39,7 @@ from transformers import pipeline
 
 classifier = pipeline("sentiment-analysis")
 classifier(
-    [
-        "I've been waiting for a HuggingFace course my whole life.",
-        "I hate this so much!",
-    ]
+    ["I've been waiting for a HuggingFace course my whole life.", "I hate this so much!",]
 )
 ```
 

diff --git a/chapters/th/chapter3/3_tf.mdx b/chapters/th/chapter3/3_tf.mdx
@@ -86,8 +86,7 @@ model.compile(
     metrics=["accuracy"],
 )
 model.fit(
-    tf_train_dataset,
-    validation_data=tf_validation_dataset,
+    tf_train_dataset, validation_data=tf_validation_dataset,
 )
 ```
 

diff --git a/chapters/th/chapter6/8.mdx b/chapters/th/chapter6/8.mdx
@@ -429,9 +429,7 @@ tokenizer.decode(encoding.ids)
 from transformers import PreTrainedTokenizerFast
 
 wrapped_tokenizer = PreTrainedTokenizerFast(
-    tokenizer_object=tokenizer,
-    bos_token="<|endoftext|>",
-    eos_token="<|endoftext|>",
+    tokenizer_object=tokenizer, bos_token="<|endoftext|>", eos_token="<|endoftext|>",
 )
 ```