From 2b1bf4d7897857575ae24d201e8282a174a9ec1e Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 29 Mar 2022 17:43:47 +0100
Subject: [PATCH 001/127] Create it

---
 chapters/it | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 chapters/it

diff --git a/chapters/it b/chapters/it
new file mode 100644
index 000000000..8b1378917
--- /dev/null
+++ b/chapters/it
@@ -0,0 +1 @@
+

From b1fce39058e59b80ea2f0faab081bd93bf2ab930 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 29 Mar 2022 17:44:40 +0100
Subject: [PATCH 002/127] Delete it

---
 chapters/it | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 chapters/it

diff --git a/chapters/it b/chapters/it
deleted file mode 100644
index 8b1378917..000000000
--- a/chapters/it
+++ /dev/null
@@ -1 +0,0 @@
-

From 73bc36f3edd393f6afa374a4319778c807d59a45 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 29 Mar 2022 19:28:39 +0100
Subject: [PATCH 003/127] Create 1.mdx

---
 chapters/it/chapter0/1.mdx | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 chapters/it/chapter0/1.mdx

diff --git a/chapters/it/chapter0/1.mdx b/chapters/it/chapter0/1.mdx
new file mode 100644
index 000000000..8b1378917
--- /dev/null
+++ b/chapters/it/chapter0/1.mdx
@@ -0,0 +1 @@
+

From 71679057823d4c38b879304aeb9ae5963592d68b Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 29 Mar 2022 19:29:08 +0100
Subject: [PATCH 004/127] Create test

---
 chapters/it/chapter1/test | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 chapters/it/chapter1/test

diff --git a/chapters/it/chapter1/test b/chapters/it/chapter1/test
new file mode 100644
index 000000000..8b1378917
--- /dev/null
+++ b/chapters/it/chapter1/test
@@ -0,0 +1 @@
+

From 5eded3ceb627e2d5c58866c93f90c4a4f485d76f Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 29 Mar 2022 19:29:34 +0100
Subject: [PATCH 005/127] Create test

---
 chapters/it/chapter2/test | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 chapters/it/chapter2/test

diff --git a/chapters/it/chapter2/test b/chapters/it/chapter2/test
new file mode 100644
index 000000000..8b1378917
--- /dev/null
+++ b/chapters/it/chapter2/test
@@ -0,0 +1 @@
+

From 5b60357cbf3a40094994a07a3344a69a6c1913c8 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 29 Mar 2022 19:30:02 +0100
Subject: [PATCH 006/127] Delete 1.mdx

---
 chapters/it/chapter0/1.mdx | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 chapters/it/chapter0/1.mdx

diff --git a/chapters/it/chapter0/1.mdx b/chapters/it/chapter0/1.mdx
deleted file mode 100644
index 8b1378917..000000000
--- a/chapters/it/chapter0/1.mdx
+++ /dev/null
@@ -1 +0,0 @@
-

From 6313f0c70fac079631b27ef04e4c71c5730c0440 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 29 Mar 2022 19:30:26 +0100
Subject: [PATCH 007/127] Create test

---
 chapters/it/chapter0/test | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 chapters/it/chapter0/test

diff --git a/chapters/it/chapter0/test b/chapters/it/chapter0/test
new file mode 100644
index 000000000..8b1378917
--- /dev/null
+++ b/chapters/it/chapter0/test
@@ -0,0 +1 @@
+

From 1a8ead49ba4006b9df77a8f89faa8cae37397e70 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 29 Mar 2022 19:31:11 +0100
Subject: [PATCH 008/127] Add files via upload

---
 chapters/it/chapter0/1.mdx | 110 +++++++++++++++++++++++++++++++++++++
 1 file changed, 110 insertions(+)
 create mode 100644 chapters/it/chapter0/1.mdx

diff --git a/chapters/it/chapter0/1.mdx b/chapters/it/chapter0/1.mdx
new file mode 100644
index 000000000..6ab7c8e23
--- /dev/null
+++ b/chapters/it/chapter0/1.mdx
@@ -0,0 +1,110 @@
+# Introduction
+
+Welcome to the Hugging Face course! This introduction will guide you through setting up a working environment. If you're just starting the course, we recommend you first take a look at [Chapter 1](/course/chapter1), then come back and set up your environment so you can try the code yourself.
+
+All the libraries that we'll be using in this course are available as Python packages, so here we'll show you how to set up a Python environment and install the specific libraries you'll need.
+
+We'll cover two ways of setting up your working environment, using a Colab notebook or a Python virtual environment. Feel free to choose the one that resonates with you the most. For beginners, we strongly recommend that you get started by using a Colab notebook.
+
+Note that we will not be covering the Windows system. If you're running on Windows, we recommend following along using a Colab notebook. If you're using a Linux distribution or macOS, you can use either approach described here.
+
+Most of the course relies on you having a Hugging Face account. We recommend creating one now: [create an account](https://huggingface.co/join).
+
+## Using a Google Colab notebook
+
+Using a Colab notebook is the simplest possible setup; boot up a notebook in your browser and get straight to coding! 
+
+If you're not familiar with Colab, we recommend you start by following the [introduction](https://colab.research.google.com/notebooks/intro.ipynb). Colab allows you to use some accelerating hardware, like GPUs or TPUs, and it is free for smaller workloads.
+
+Once you're comfortable moving around in Colab, create a new notebook and get started with the setup:
+
+<div class="flex justify-center">
+<img src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter0/new_colab.png" alt="An empty colab notebook" width="80%"/>
+</div>
+
+The next step is to install the libraries that we'll be using in this course. We'll use `pip` for the installation, which is the package manager for Python. In notebooks, you can run system commands by preceding them with the `!` character, so you can install the 🤗 Transformers library as follows:
+
+```
+!pip install transformers
+```
+
+You can make sure the package was correctly installed by importing it within your Python runtime:
+
+```
+import transformers
+```
+
+<div class="flex justify-center">
+<img src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter0/install.gif" alt="A gif showing the result of the two commands above: installation and import" width="80%"/>
+</div>
+
+This installs a very light version of 🤗 Transformers. In particular, no specific machine learning frameworks (like PyTorch or TensorFlow) are installed. Since we'll be using a lot of different features of the library, we recommend installing the development version, which comes with all the required dependencies for pretty much any imaginable use case:
+
+```
+!pip install transformers[sentencepiece]
+```
+
+This will take a bit of time, but then you'll be ready to go for the rest of the course!
+
+## Using a Python virtual environment
+
+If you prefer to use a Python virtual environment, the first step is to install Python on your system. We recommend following [this guide](https://realpython.com/installing-python/) to get started.
+
+Once you have Python installed, you should be able to run Python commands in your terminal. You can start by running the following command to ensure that it is correctly installed before proceeding to the next steps: `python --version`. This should print out the Python version now available on your system.
+
+When running a Python command in your terminal, such as `python --version`, you should think of the program running your command as the "main" Python on your system. We recommend keeping this main installation free of any packages, and using it to create separate environments for each application you work on — this way, each application can have its own dependencies and packages, and you won't need to worry about potential compatibility issues with other applications.
+
+In Python this is done with [*virtual environments*](https://docs.python.org/3/tutorial/venv.html), which are self-contained directory trees that each contain a Python installation with a particular Python version alongside all the packages the application needs. Creating such a virtual environment can be done with a number of different tools, but we'll use the official Python package for that purpose, which is called [`venv`](https://docs.python.org/3/library/venv.html#module-venv).
+
+First, create the directory you'd like your application to live in — for example, you might want to make a new directory called *transformers-course* at the root of your home directory:
+
+```
+mkdir ~/transformers-course
+cd ~/transformers-course
+```
+
+From inside this directory, create a virtual environment using the Python `venv` module:
+
+```
+python -m venv .env
+```
+
+You should now have a directory called *.env* in your otherwise empty folder:
+
+```
+ls -a
+```
+
+```out
+.      ..    .env
+```
+
+You can jump in and out of your virtual environment with the `activate` and `deactivate` scripts:
+
+```
+# Activate the virtual environment
+source .env/bin/activate
+
+# Deactivate the virtual environment
+source .env/bin/deactivate
+```
+
+You can make sure that the environment is activated by running the `which python` command: if it points to the virtual environment, then you have successfully activated it!
+
+```
+which python
+```
+
+```out
+/home/<user>/transformers-course/.env/bin/python
+```
+
+### Installing dependencies
+
+As in the previous section on using Google Colab instances, you'll now need to install the packages required to continue. Again, you can install the development version of 🤗 Transformers using the `pip` package manager:
+
+```
+pip install "transformers[sentencepiece]"
+```
+
+You're now all set up and ready to go!

From 6e713c17d6ab4ee7985469ba25251e88f3ae562a Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 29 Mar 2022 19:31:35 +0100
Subject: [PATCH 009/127] Delete test

---
 chapters/it/chapter0/test | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 chapters/it/chapter0/test

diff --git a/chapters/it/chapter0/test b/chapters/it/chapter0/test
deleted file mode 100644
index 8b1378917..000000000
--- a/chapters/it/chapter0/test
+++ /dev/null
@@ -1 +0,0 @@
-

From c79c8693eeda20819f83239979cc398e4eacfad8 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 29 Mar 2022 19:32:13 +0100
Subject: [PATCH 010/127] Add files via upload

---
 chapters/it/chapter1/1.mdx  |  52 ++++++
 chapters/it/chapter1/10.mdx | 254 ++++++++++++++++++++++++++++
 chapters/it/chapter1/2.mdx  |  21 +++
 chapters/it/chapter1/3.mdx  | 329 ++++++++++++++++++++++++++++++++++++
 chapters/it/chapter1/4.mdx  | 171 +++++++++++++++++++
 chapters/it/chapter1/5.mdx  |  17 ++
 chapters/it/chapter1/6.mdx  |  16 ++
 chapters/it/chapter1/7.mdx  |  16 ++
 chapters/it/chapter1/8.mdx  |  32 ++++
 chapters/it/chapter1/9.mdx  |  11 ++
 10 files changed, 919 insertions(+)
 create mode 100644 chapters/it/chapter1/1.mdx
 create mode 100644 chapters/it/chapter1/10.mdx
 create mode 100644 chapters/it/chapter1/2.mdx
 create mode 100644 chapters/it/chapter1/3.mdx
 create mode 100644 chapters/it/chapter1/4.mdx
 create mode 100644 chapters/it/chapter1/5.mdx
 create mode 100644 chapters/it/chapter1/6.mdx
 create mode 100644 chapters/it/chapter1/7.mdx
 create mode 100644 chapters/it/chapter1/8.mdx
 create mode 100644 chapters/it/chapter1/9.mdx

diff --git a/chapters/it/chapter1/1.mdx b/chapters/it/chapter1/1.mdx
new file mode 100644
index 000000000..b3f26714a
--- /dev/null
+++ b/chapters/it/chapter1/1.mdx
@@ -0,0 +1,52 @@
+# Introduction
+
+## Welcome to the 🤗 Course!
+
+<Youtube id="00GKzGyWFEs" />
+
+This course will teach you about natural language processing (NLP) using libraries from the [Hugging Face](https://huggingface.co/) ecosystem — [🤗 Transformers](https://github.com/huggingface/transformers), [🤗 Datasets](https://github.com/huggingface/datasets), [🤗 Tokenizers](https://github.com/huggingface/tokenizers), and [🤗 Accelerate](https://github.com/huggingface/accelerate) — as well as the [Hugging Face Hub](https://huggingface.co/models). It's completely free and without ads.
+
+
+## What to expect?
+
+Here is a brief overview of the course:
+
+<div class="flex justify-center">
+<img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/summary.svg" alt="Brief overview of the chapters of the course.">
+<img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/summary-dark.svg" alt="Brief overview of the chapters of the course.">
+</div>
+
+- Chapters 1 to 4 provide an introduction to the main concepts of the 🤗 Transformers library. By the end of this part of the course, you will be familiar with how Transformer models work and will know how to use a model from the [Hugging Face Hub](https://huggingface.co/models), fine-tune it on a dataset, and share your results on the Hub!
+- Chapters 5 to 8 teach the basics of 🤗 Datasets and 🤗 Tokenizers before diving into classic NLP tasks. By the end of this part, you will be able to tackle the most common NLP problems by yourself.
+- Chapters 9 to 12 go beyond NLP, and explore how Transformer models can be used tackle tasks in speech processing and computer vision. Along the way, you'll learn how to build and share demos of your models, and optimize them for production environments. By the end of this part, you will be ready to apply 🤗 Transformers to (almost) any machine learning problem!
+
+This course:
+
+* Requires a good knowledge of Python
+* Is better taken after an introductory deep learning course, such as [fast.ai's](https://www.fast.ai/) [Practical Deep Learning for Coders](https://course.fast.ai/) or one of the programs developed by [DeepLearning.AI](https://www.deeplearning.ai/)
+* Does not expect prior [PyTorch](https://pytorch.org/) or [TensorFlow](https://www.tensorflow.org/) knowledge, though some familiarity with either of those will help
+
+After you've completed this course, we recommend checking out DeepLearning.AI's [Natural Language Processing Specialization](https://www.coursera.org/specializations/natural-language-processing?utm_source=deeplearning-ai&utm_medium=institutions&utm_campaign=20211011-nlp-2-hugging_face-page-nlp-refresh), which covers a wide range of traditional NLP models like naive Bayes and LSTMs that are well worth knowing about!
+
+## Who are we?
+
+About the authors:
+
+**Matthew Carrigan** is a Machine Learning Engineer at Hugging Face. He lives in Dublin, Ireland and previously worked as an ML engineer at Parse.ly and before that as a post-doctoral researcher at Trinity College Dublin. He does not believe we're going to get to AGI by scaling existing architectures, but has high hopes for robot immortality regardless.
+
+**Lysandre Debut** is a Machine Learning Engineer at Hugging Face and has been working on the 🤗 Transformers library since the very early development stages. His aim is to make NLP accessible for everyone by developing tools with a very simple API.
+
+**Sylvain Gugger** is a Research Engineer at Hugging Face and one of the core maintainers of the 🤗 Transformers library. Previously he was a Research Scientist at fast.ai, and he co-wrote _[Deep Learning for Coders with fastai and PyTorch](https://learning.oreilly.com/library/view/deep-learning-for/9781492045519/)_ with Jeremy Howard. The main focus of his research is on making deep learning more accessible, by designing and improving techniques that allow models to train fast on limited resources.
+
+**Merve Noyan** is a developer advocate at Hugging Face, working on developing tools and building content around them to democratize machine learning for everyone.
+
+**Lucile Saulnier** is a machine learning engineer at Hugging Face, developing and supporting the use of open source tools. She is also actively involved in many research projects in the field of Natural Language Processing such as collaborative training and BigScience.
+
+**Lewis Tunstall**  is a machine learning engineer at Hugging Face, focused on developing open-source tools and making them accessible to the wider community. He is also a co-author of an upcoming [O’Reilly book on Transformers](https://www.oreilly.com/library/view/natural-language-processing/9781098103231/).
+
+**Leandro von Werra**  is a machine learning engineer in the open-source team at Hugging Face and also a co-author of the an upcoming [O’Reilly book on Transformers](https://www.oreilly.com/library/view/natural-language-processing/9781098103231/). He has several years of industry experience bringing NLP projects to production by working across the whole machine learning stack..
+
+Are you ready to roll? In this chapter, you will learn:
+* How to use the `pipeline()` function to solve NLP tasks such as text generation and classification
+* About the Transformer architecture
+* How to distinguish between encoder, decoder, and encoder-decoder architectures and use cases
diff --git a/chapters/it/chapter1/10.mdx b/chapters/it/chapter1/10.mdx
new file mode 100644
index 000000000..355cade7f
--- /dev/null
+++ b/chapters/it/chapter1/10.mdx
@@ -0,0 +1,254 @@
+<!-- DISABLE-FRONTMATTER-SECTIONS -->
+
+# End-of-chapter quiz
+
+This chapter covered a lot of ground! Don't worry if you didn't grasp all the details; the next chapters will help you understand how things work under the hood.
+
+First, though, let's test what you learned in this chapter!
+
+
+### 1. Explore the Hub and look for the `roberta-large-mnli` checkpoint. What task does it perform?
+
+
+<Question
+	choices={[
+		{
+			text: "Summarization",
+			explain: "Look again on the <a href=\"https://huggingface.co/roberta-large-mnli\">roberta-large-mnli page</a>."
+		},
+		{
+			text: "Text classification",
+			explain: "More precisely, it classifies if two sentences are logically linked across three labels (contradiction, neutral, entailment) — a task also called <em>natural language inference</em>.",
+			correct: true
+		},
+		{
+			text: "Text generation",
+			explain: "Look again on the <a href=\"https://huggingface.co/roberta-large-mnli\">roberta-large-mnli page</a>."
+		}
+	]}
+/>
+
+### 2. What will the following code return?
+
+```py
+from transformers import pipeline
+
+ner = pipeline("ner", grouped_entities=True)
+ner("My name is Sylvain and I work at Hugging Face in Brooklyn.")
+```
+
+<Question
+	choices={[
+		{
+			text: "It will return classification scores for this sentence, with labels \"positive\" or \"negative\".",
+			explain: "This is incorrect — this would be a <code>sentiment-analysis</code> pipeline."
+		},
+		{
+			text: "It will return a generated text completing this sentence.",
+			explain: "This is incorrect — it would be a <code>text-generation</code> pipeline.",
+		},
+		{
+			text: "It will return the words representing persons, organizations or locations.",
+			explain: "Furthermore, with <code>grouped_entities=True</code>, it will group together the words belonging to the same entity, like \"Hugging Face\".",
+			correct: true
+		}
+	]}
+/>
+
+### 3. What should replace ... in this code sample?
+
+```py
+from transformers import pipeline
+
+filler = pipeline("fill-mask", model="bert-base-cased")
+result = filler("...")
+```
+
+<Question
+	choices={[
+		{
+			text: "This &#60;mask> has been waiting for you.",
+			explain: "This is incorrect. Check out the <code>bert-base-cased</code> model card and try to spot your mistake."
+		},
+		{
+			text: "This [MASK] has been waiting for you.",
+			explain: "Correct! This model's mask token is [MASK].",
+			correct: true
+		},
+		{
+			text: "This man has been waiting for you.",
+			explain: "This is incorrect. This pipeline fills in masked words, so it needs a mask token somewhere."
+		}
+	]}
+/>
+
+### 4. Why will this code fail?
+
+```py
+from transformers import pipeline
+
+classifier = pipeline("zero-shot-classification")
+result = classifier("This is a course about the Transformers library")
+```
+
+<Question
+	choices={[
+		{
+			text: "This pipeline requires that labels be given to classify this text.",
+			explain: "Right — the correct code needs to include <code>candidate_labels=[...]</code>.",
+			correct: true
+		},
+		{
+			text: "This pipeline requires several sentences, not just one.",
+			explain: "This is incorrect, though when properly used, this pipeline can take a list of sentences to process (like all other pipelines)."
+		},
+		{
+			text: "The 🤗 Transformers library is broken, as usual.",
+			explain: "We won't dignify this answer with a comment!"
+		},
+		{
+			text: "This pipeline requires longer inputs; this one is too short.",
+			explain: "This is incorrect. Note that a very long text will be truncated when processed by this pipeline."
+		}
+	]}
+/>
+
+### 5. What does "transfer learning" mean?
+
+<Question
+	choices={[
+		{
+			text: "Transferring the knowledge of a pretrained model to a new model by training it on the same dataset.",
+			explain: "No, that would be two versions of the same model."
+		},
+		{
+			text: "Transferring the knowledge of a pretrained model to a new model by initializing the second model with the first model's weights.",
+			explain: "Correct: when the second model is trained on a new task, it *transfers* the knowledge of the first model.",
+			correct: true
+		},
+		{
+			text: "Transferring the knowledge of a pretrained model to a new model by building the second model with the same architecture as the first model.",
+			explain: "The architecture is just the way the model is built; there is no knowledge shared or transferred in this case."
+		}
+	]}
+/>
+
+### 6. True or false? A language model usually does not need labels for its pretraining.
+
+
+<Question
+	choices={[
+		{
+			text: "True",
+			explain: "The pretraining is usually <em>self-supervised</em>, which means the labels are created automatically from the inputs (like predicting the next word or filling in some masked words).",
+			correct: true
+		},
+		{
+			text: "False",
+			explain: "This is not the correct answer."
+		}
+	]}
+/>
+
+### 7. Select the sentence that best describes the terms "model," "architecture," and "weights."
+
+<Question
+	choices={[
+		{
+			text: "If a model is a building, its architecture is the blueprint and the weights are the people living inside.",
+			explain: "Following this metaphor, the weights would be the bricks and other materials used to construct the building."
+		},
+		{
+			text: "An architecture is a map to build a model and its weights are the cities represented on the map.",
+			explain: "The problem with this metaphor is that a map usually represents one existing reality (there is only one city in France named Paris). For a given architecture, multiple weights are possible."
+		},
+		{
+			text: "An architecture is a succession of mathematical functions to build a model and its weights are those functions parameters.",
+			explain: "The same set of mathematical functions (architecture) can be used to build different models by using different parameters (weights).",
+			correct: true
+		}
+	]}
+/>
+
+
+### 8. Which of these types of models would you use for completing prompts with generated text?
+
+<Question
+	choices={[
+		{
+			text: "An encoder model",
+			explain: "An encoder model generates a representation of the whole sentence that is better suited for tasks like classification."
+		},
+		{
+			text: "A decoder model",
+			explain: "Decoder models are perfectly suited for text generation from a prompt.",
+			correct: true
+		},
+		{
+			text: "A sequence-to-sequence model",
+			explain: "Sequence-to-sequence models are better suited for tasks where you want to generate sentences in relation to the input sentences, not a given prompt."
+		}
+	]}
+/>
+
+### 9. Which of those types of models would you use for summarizing texts?
+
+<Question
+	choices={[
+		{
+			text: "An encoder model",
+			explain: "An encoder model generates a representation of the whole sentence that is better suited for tasks like classification."
+		},
+		{
+			text: "A decoder model",
+			explain: "Decoder models are good for generating output text (like summaries), but they don't have the ability to exploit a context like the whole text to summarize."
+		},
+		{
+			text: "A sequence-to-sequence model",
+			explain: "Sequence-to-sequence models are perfectly suited for a summarization task.",
+			correct: true
+		}
+	]}
+/>
+
+### 10. Which of these types of models would you use for classifying text inputs according to certain labels?
+
+<Question
+	choices={[
+		{
+			text: "An encoder model",
+			explain: "An encoder model generates a representation of the whole sentence which is perfectly suited for a task like classification.",
+			correct: true
+		},
+		{
+			text: "A decoder model",
+			explain: "Decoder models are good for generating output texts, not extracting a label out of a sentence."
+		},
+		{
+			text: "A sequence-to-sequence model",
+			explain: "Sequence-to-sequence models are better suited for tasks where you want to generate text based on an input sentence, not a label.",
+		}
+	]}
+/>
+
+### 11. What possible source can the bias observed in a model have?
+
+<Question
+	choices={[
+		{
+			text: "The model is a fine-tuned version of a pretrained model and it picked up its bias from it.",
+			explain: "When applying Transfer Learning, the bias in the pretrained model used perspires in the fine-tuned model.",
+			correct: true
+		},
+		{
+			text: "The data the model was trained on is biased.",
+			explain: "This is the most obvious source of bias, but not the only one.",
+			correct: true
+		},
+		{
+			text: "The metric the model was optimizing for is biased.",
+			explain: "A less obvious source of bias is the way the model is trained. Your model will blindly optimize for whatever metric you chose, without any second thoughts.",
+			correct: true
+		}
+	]}
+/>
diff --git a/chapters/it/chapter1/2.mdx b/chapters/it/chapter1/2.mdx
new file mode 100644
index 000000000..4e4aecc1a
--- /dev/null
+++ b/chapters/it/chapter1/2.mdx
@@ -0,0 +1,21 @@
+# Natural Language Processing
+
+Before jumping into Transformer models, let's do a quick overview of what natural language processing is and why we care about it.
+
+## What is NLP?
+
+NLP is a field of linguistics and machine learning focused on understanding everything related to human language. The aim of NLP tasks is not only to understand single words individually, but to be able to understand the context of those words.
+
+The following is a list of common NLP tasks, with some examples of each:
+
+- **Classifying whole sentences**: Getting the sentiment of a review, detecting if an email is spam, determining if a sentence is grammatically correct or whether two sentences are logically related or not
+- **Classifying each word in a sentence**: Identifying the grammatical components of a sentence (noun, verb, adjective), or the named entities (person, location, organization)
+- **Generating text content**: Completing a prompt with auto-generated text, filling in the blanks in a text with masked words
+- **Extracting an answer from a text**: Given a question and a context, extracting the answer to the question based on the information provided in the context
+- **Generating a new sentence from an input text**: Translating a text into another language, summarizing a text
+
+NLP isn't limited to written text though. It also tackles complex challenges in speech recognition and computer vision, such as generating a transcript of an audio sample or a description of an image.
+
+## Why is it challenging?
+
+Computers don't process information in the same way as humans. For example, when we read the sentence "I am hungry," we can easily understand its meaning. Similarly, given two sentences such as "I am hungry" and "I am sad," we're able to easily determine how similar they are. For machine learning (ML) models, such tasks are more difficult. The text needs to be processed in a way that enables the model to learn from it. And because language is complex, we need to think carefully about how this processing must be done. There has been a lot of research done on how to represent text, and we will look at some methods in the next chapter.
diff --git a/chapters/it/chapter1/3.mdx b/chapters/it/chapter1/3.mdx
new file mode 100644
index 000000000..ac22e7e8f
--- /dev/null
+++ b/chapters/it/chapter1/3.mdx
@@ -0,0 +1,329 @@
+# Transformers, what can they do?
+
+<DocNotebookDropdown
+  classNames="absolute z-10 right-0 top-0"
+  options={[
+    {label: "Google Colab", value: "https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter1/section3.ipynb"},
+    {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter1/section3.ipynb"},
+]} />
+
+In this section, we will look at what Transformer models can do and use our first tool from the 🤗 Transformers library: the `pipeline()` function.
+
+<Tip>
+👀 See that <em>Open in Colab</em> button on the top right? Click on it to open a Google Colab notebook with all the code samples of this section. This button will be present in any section containing code examples. 
+
+If you want to run the examples locally, we recommend taking a look at the <a href="/course/chapter0">setup</a>.
+</Tip>
+
+## Transformers are everywhere!
+
+Transformer models are used to solve all kinds of NLP tasks, like the ones mentioned in the previous section. Here are some of the companies and organizations using Hugging Face and Transformer models, who also contribute back to the community by sharing their models:
+
+<img src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/companies.PNG" alt="Companies using Hugging Face" width="100%">
+
+The [🤗 Transformers library](https://github.com/huggingface/transformers) provides the functionality to create and use those shared models. The [Model Hub](https://huggingface.co/models) contains thousands of pretrained models that anyone can download and use. You can also upload your own models to the Hub!
+
+<Tip>
+⚠️ The Hugging Face Hub is not limited to Transformer models. Anyone can share any kind of models or datasets they want! <a href="https://huggingface.co/join">Create a huggingface.co</a> account to benefit from all available features!
+</Tip>
+
+Before diving into how Transformer models work under the hood, let's look at a few examples of how they can be used to solve some interesting NLP problems.
+
+## Working with pipelines
+
+<Youtube id="tiZFewofSLM" />
+
+The most basic object in the 🤗 Transformers library is the `pipeline()` function. It connects a model with its necessary preprocessing and postprocessing steps, allowing us to directly input any text and get an intelligible answer:
+
+```python
+from transformers import pipeline
+
+classifier = pipeline("sentiment-analysis")
+classifier("I've been waiting for a HuggingFace course my whole life.")
+```
+
+```python out
+[{'label': 'POSITIVE', 'score': 0.9598047137260437}]
+```
+
+We can even pass several sentences!
+
+```python
+classifier(
+    ["I've been waiting for a HuggingFace course my whole life.", "I hate this so much!"]
+)
+```
+
+```python out
+[{'label': 'POSITIVE', 'score': 0.9598047137260437},
+ {'label': 'NEGATIVE', 'score': 0.9994558095932007}]
+```
+
+By default, this pipeline selects a particular pretrained model that has been fine-tuned for sentiment analysis in English. The model is downloaded and cached when you create the `classifier` object. If you rerun the command, the cached model will be used instead and there is no need to download the model again.
+
+There are three main steps involved when you pass some text to a pipeline:
+
+1. The text is preprocessed into a format the model can understand.
+2. The preprocessed inputs are passed to the model.
+3. The predictions of the model are post-processed, so you can make sense of them.
+
+
+Some of the currently [available pipelines](https://huggingface.co/transformers/main_classes/pipelines.html) are:
+
+- `feature-extraction` (get the vector representation of a text)
+- `fill-mask`
+- `ner` (named entity recognition)
+- `question-answering`
+- `sentiment-analysis`
+- `summarization`
+- `text-generation`
+- `translation`
+- `zero-shot-classification`
+
+Let's have a look at a few of these!
+
+## Zero-shot classification
+
+We'll start by tackling a more challenging task where we need to classify texts that haven't been labelled. This is a common scenario in real-world projects because annotating text is usually time-consuming and requires domain expertise. For this use case, the `zero-shot-classification` pipeline is very powerful: it allows you to specify which labels to use for the classification, so you don't have to rely on the labels of the pretrained model. You've already seen how the model can classify a sentence as positive or negative using those two labels — but it can also classify the text using any other set of labels you like.
+
+```python
+from transformers import pipeline
+
+classifier = pipeline("zero-shot-classification")
+classifier(
+    "This is a course about the Transformers library",
+    candidate_labels=["education", "politics", "business"],
+)
+```
+
+```python out
+{'sequence': 'This is a course about the Transformers library',
+ 'labels': ['education', 'business', 'politics'],
+ 'scores': [0.8445963859558105, 0.111976258456707, 0.043427448719739914]}
+```
+
+This pipeline is called _zero-shot_ because you don't need to fine-tune the model on your data to use it. It can directly return probability scores for any list of labels you want!
+
+<Tip>
+
+✏️ **Try it out!** Play around with your own sequences and labels and see how the model behaves.
+
+</Tip>
+
+
+## Text generation
+
+Now let's see how to use a pipeline to generate some text. The main idea here is that you provide a prompt and the model will auto-complete it by generating the remaining text. This is similar to the predictive text feature that is found on many phones. Text generation involves randomness, so it's normal if you don't get the same results as shown below.
+
+```python
+from transformers import pipeline
+
+generator = pipeline("text-generation")
+generator("In this course, we will teach you how to")
+```
+
+```python out
+[{'generated_text': 'In this course, we will teach you how to understand and use '
+                    'data flow and data interchange when handling user data. We '
+                    'will be working with one or more of the most commonly used '
+                    'data flows — data flows of various types, as seen by the '
+                    'HTTP'}]
+```
+
+You can control how many different sequences are generated with the argument `num_return_sequences` and the total length of the output text with the argument `max_length`.
+
+<Tip>
+
+✏️ **Try it out!** Use the `num_return_sequences` and `max_length` arguments to generate two sentences of 15 words each.
+
+</Tip>
+
+
+## Using any model from the Hub in a pipeline
+
+The previous examples used the default model for the task at hand, but you can also choose a particular model from the Hub to use in a pipeline for a specific task — say, text generation. Go to the [Model Hub](https://huggingface.co/models) and click on the corresponding tag on the left to display only the supported models for that task. You should get to a page like [this one](https://huggingface.co/models?pipeline_tag=text-generation).
+
+Let's try the [`distilgpt2`](https://huggingface.co/distilgpt2) model! Here's how to load it in the same pipeline as before:
+
+```python
+from transformers import pipeline
+
+generator = pipeline("text-generation", model="distilgpt2")
+generator(
+    "In this course, we will teach you how to",
+    max_length=30,
+    num_return_sequences=2,
+)
+```
+
+```python out
+[{'generated_text': 'In this course, we will teach you how to manipulate the world and '
+                    'move your mental and physical capabilities to your advantage.'},
+ {'generated_text': 'In this course, we will teach you how to become an expert and '
+                    'practice realtime, and with a hands on experience on both real '
+                    'time and real'}]
+```
+
+You can refine your search for a model by clicking on the language tags, and pick a model that will generate text in another language. The Model Hub even contains checkpoints for multilingual models that support several languages.
+
+Once you select a model by clicking on it, you'll see that there is a widget enabling you to try it directly online. This way you can quickly test the model's capabilities before downloading it.
+
+<Tip>
+
+✏️ **Try it out!** Use the filters to find a text generation model for another language. Feel free to play with the widget and use it in a pipeline!
+
+</Tip>
+
+### The Inference API
+
+All the models can be tested directly through your browser using the Inference API, which is available on the Hugging Face [website](https://huggingface.co/). You can play with the model directly on this page by inputting custom text and watching the model process the input data.
+
+The Inference API that powers the widget is also available as a paid product, which comes in handy if you need it for your workflows. See the [pricing page](https://huggingface.co/pricing) for more details.
+
+## Mask filling
+
+The next pipeline you'll try is `fill-mask`. The idea of this task is to fill in the blanks in a given text:
+
+```python
+from transformers import pipeline
+
+unmasker = pipeline("fill-mask")
+unmasker("This course will teach you all about <mask> models.", top_k=2)
+```
+
+```python out
+[{'sequence': 'This course will teach you all about mathematical models.',
+  'score': 0.19619831442832947,
+  'token': 30412,
+  'token_str': ' mathematical'},
+ {'sequence': 'This course will teach you all about computational models.',
+  'score': 0.04052725434303284,
+  'token': 38163,
+  'token_str': ' computational'}]
+```
+
+The `top_k` argument controls how many possibilities you want to be displayed. Note that here the model fills in the special `<mask>` word, which is often referred to as a *mask token*. Other mask-filling models might have different mask tokens, so it's always good to verify the proper mask word when exploring other models. One way to check it is by looking at the mask word used in the widget.
+
+<Tip>
+
+✏️ **Try it out!** Search for the `bert-base-cased` model on the Hub and identify its mask word in the Inference API widget. What does this model predict for the sentence in our `pipeline` example above?
+
+</Tip>
+
+## Named entity recognition
+
+Named entity recognition (NER) is a task where the model has to find which parts of the input text correspond to entities such as persons, locations, or organizations. Let's look at an example:
+
+```python
+from transformers import pipeline
+
+ner = pipeline("ner", grouped_entities=True)
+ner("My name is Sylvain and I work at Hugging Face in Brooklyn.")
+```
+
+```python out
+[{'entity_group': 'PER', 'score': 0.99816, 'word': 'Sylvain', 'start': 11, 'end': 18}, 
+ {'entity_group': 'ORG', 'score': 0.97960, 'word': 'Hugging Face', 'start': 33, 'end': 45}, 
+ {'entity_group': 'LOC', 'score': 0.99321, 'word': 'Brooklyn', 'start': 49, 'end': 57}
+]
+```
+
+Here the model correctly identified that Sylvain is a person (PER), Hugging Face an organization (ORG), and Brooklyn a location (LOC).
+
+We pass the option `grouped_entities=True` in the pipeline creation function to tell the pipeline to regroup together the parts of the sentence that correspond to the same entity: here the model correctly grouped "Hugging" and "Face" as a single organization, even though the name consists of multiple words. In fact, as we will see in the next chapter, the preprocessing even splits some words into smaller parts. For instance, `Sylvain` is split into four pieces: `S`, `##yl`, `##va`, and `##in`. In the post-processing step, the pipeline successfully regrouped those pieces.
+
+<Tip>
+
+✏️ **Try it out!** Search the Model Hub for a model able to do part-of-speech tagging (usually abbreviated as POS) in English. What does this model predict for the sentence in the example above?
+
+</Tip>
+
+## Question answering
+
+The `question-answering` pipeline answers questions using information from a given context:
+
+```python
+from transformers import pipeline
+
+question_answerer = pipeline("question-answering")
+question_answerer(
+    question="Where do I work?",
+    context="My name is Sylvain and I work at Hugging Face in Brooklyn",
+)
+```
+
+```python out
+{'score': 0.6385916471481323, 'start': 33, 'end': 45, 'answer': 'Hugging Face'}
+```
+
+Note that this pipeline works by extracting information from the provided context; it does not generate the answer.
+
+## Summarization
+
+Summarization is the task of reducing a text into a shorter text while keeping all (or most) of the important aspects referenced in the text. Here's an example:
+
+```python
+from transformers import pipeline
+
+summarizer = pipeline("summarization")
+summarizer(
+    """
+    America has changed dramatically during recent years. Not only has the number of 
+    graduates in traditional engineering disciplines such as mechanical, civil, 
+    electrical, chemical, and aeronautical engineering declined, but in most of 
+    the premier American universities engineering curricula now concentrate on 
+    and encourage largely the study of engineering science. As a result, there 
+    are declining offerings in engineering subjects dealing with infrastructure, 
+    the environment, and related issues, and greater concentration on high 
+    technology subjects, largely supporting increasingly complex scientific 
+    developments. While the latter is important, it should not be at the expense 
+    of more traditional engineering.
+
+    Rapidly developing economies such as China and India, as well as other 
+    industrial countries in Europe and Asia, continue to encourage and advance 
+    the teaching of engineering. Both China and India, respectively, graduate 
+    six and eight times as many traditional engineers as does the United States. 
+    Other industrial countries at minimum maintain their output, while America 
+    suffers an increasingly serious decline in the number of engineering graduates 
+    and a lack of well-educated engineers.
+"""
+)
+```
+
+```python out
+[{'summary_text': ' America has changed dramatically during recent years . The '
+                  'number of engineering graduates in the U.S. has declined in '
+                  'traditional engineering disciplines such as mechanical, civil '
+                  ', electrical, chemical, and aeronautical engineering . Rapidly '
+                  'developing economies such as China and India, as well as other '
+                  'industrial countries in Europe and Asia, continue to encourage '
+                  'and advance engineering .'}]
+```
+
+Like with text generation, you can specify a `max_length` or a `min_length` for the result.
+
+
+## Translation
+
+For translation, you can use a default model if you provide a language pair in the task name (such as `"translation_en_to_fr"`), but the easiest way is to pick the model you want to use on the [Model Hub](https://huggingface.co/models). Here we'll try translating from French to English:
+
+```python
+from transformers import pipeline
+
+translator = pipeline("translation", model="Helsinki-NLP/opus-mt-fr-en")
+translator("Ce cours est produit par Hugging Face.")
+```
+
+```python out
+[{'translation_text': 'This course is produced by Hugging Face.'}]
+```
+
+Like with text generation and summarization, you can specify a `max_length` or a `min_length` for the result.
+
+<Tip>
+
+✏️ **Try it out!** Search for translation models in other languages and try to translate the previous sentence into a few different languages.
+
+</Tip>
+
+The pipelines shown so far are mostly for demonstrative purposes. They were programmed for specific tasks and cannot perform variations of them. In the next chapter, you'll learn what's inside a `pipeline()` function and how to customize its behavior.
diff --git a/chapters/it/chapter1/4.mdx b/chapters/it/chapter1/4.mdx
new file mode 100644
index 000000000..255d37f6e
--- /dev/null
+++ b/chapters/it/chapter1/4.mdx
@@ -0,0 +1,171 @@
+# How do Transformers work?
+
+In this section, we will take a high-level look at the architecture of Transformer models.
+
+## A bit of Transformer history
+
+Here are some reference points in the (short) history of Transformer models:
+
+<div class="flex justify-center">
+<img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/transformers_chrono.svg" alt="A brief chronology of Transformers models.">
+<img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/transformers_chrono-dark.svg" alt="A brief chronology of Transformers models.">
+</div>
+
+The [Transformer architecture](https://arxiv.org/abs/1706.03762) was introduced in June 2017. The focus of the original research was on translation tasks. This was followed by the introduction of several influential models, including:
+
+- **June 2018**: [GPT](https://cdn.openai.com/research-covers/language-unsupervised/language_understanding_paper.pdf), the first pretrained Transformer model, used for fine-tuning on various NLP tasks and obtained state-of-the-art results
+
+- **October 2018**: [BERT](https://arxiv.org/abs/1810.04805), another large pretrained model, this one designed to produce better summaries of sentences (more on this in the next chapter!)
+
+- **February 2019**: [GPT-2](https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf), an improved (and bigger) version of GPT that was not immediately publicly released due to ethical concerns
+
+- **October 2019**: [DistilBERT](https://arxiv.org/abs/1910.01108), a distilled version of BERT that is 60% faster, 40% lighter in memory, and still retains 97% of BERT's performance
+
+- **October 2019**: [BART](https://arxiv.org/abs/1910.13461) and [T5](https://arxiv.org/abs/1910.10683), two large pretrained models using the same architecture as the original Transformer model (the first to do so)
+
+- **May 2020**, [GPT-3](https://arxiv.org/abs/2005.14165), an even bigger version of GPT-2 that is able to perform well on a variety of tasks without the need for fine-tuning (called _zero-shot learning_)
+
+This list is far from comprehensive, and is just meant to highlight a few of the different kinds of Transformer models. Broadly, they can be grouped into three categories:
+
+- GPT-like (also called _auto-regressive_ Transformer models)
+- BERT-like (also called _auto-encoding_ Transformer models) 
+- BART/T5-like (also called _sequence-to-sequence_ Transformer models)
+
+We will dive into these families in more depth later on.
+
+## Transformers are language models
+
+All the Transformer models mentioned above (GPT, BERT, BART, T5, etc.) have been trained as *language models*. This means they have been trained on large amounts of raw text in a self-supervised fashion. Self-supervised learning is a type of training in which the objective is automatically computed from the inputs of the model. That means that humans are not needed to label the data!
+
+This type of model develops a statistical understanding of the language it has been trained on, but it's not very useful for specific practical tasks. Because of this, the general pretrained model then goes through a process called *transfer learning*. During this process, the model is fine-tuned in a supervised way -- that is, using human-annotated labels -- on a given task.
+
+An example of a task is predicting the next word in a sentence having read the *n* previous words. This is called *causal language modeling* because the output depends on the past and present inputs, but not the future ones.
+
+<div class="flex justify-center">
+<img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/causal_modeling.svg" alt="Example of causal language modeling in which the next word from a sentence is predicted.">
+<img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/causal_modeling-dark.svg" alt="Example of causal language modeling in which the next word from a sentence is predicted.">
+</div>
+
+Another example is *masked language modeling*, in which the model predicts a masked word in the sentence.
+
+<div class="flex justify-center">
+<img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/masked_modeling.svg" alt="Example of masked language modeling in which a masked word from a sentence is predicted.">
+<img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/masked_modeling-dark.svg" alt="Example of masked language modeling in which a masked word from a sentence is predicted.">
+</div>
+
+## Transformers are big models
+
+Apart from a few outliers (like DistilBERT), the general strategy to achieve better performance is by increasing the models' sizes as well as the amount of data they are pretrained on.
+
+<div class="flex justify-center">
+<img src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/model_parameters.png" alt="Number of parameters of recent Transformers models" width="90%">
+</div>
+
+Unfortunately, training a model, especially a large one, requires a large amount of data. This becomes very costly in terms of time and compute resources. It even translates to environmental impact, as can be seen in the following graph.
+
+<div class="flex justify-center">
+<img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/carbon_footprint.svg" alt="The carbon footprint of a large language model.">
+<img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/carbon_footprint-dark.svg" alt="The carbon footprint of a large language model.">
+</div>
+
+<Youtube id="ftWlj4FBHTg"/>
+
+And this is showing a project for a (very big) model led by a team consciously trying to reduce the environmental impact of pretraining. The footprint of running lots of trials to get the best hyperparameters would be even higher.
+
+Imagine if each time a research team, a student organization, or a company wanted to train a model, it did so from scratch. This would lead to huge, unnecessary global costs!
+
+This is why sharing language models is paramount: sharing the trained weights and building on top of already trained weights reduces the overall compute cost and carbon footprint of the community.
+
+
+## Transfer Learning
+
+<Youtube id="BqqfQnyjmgg" />
+
+*Pretraining* is the act of training a model from scratch: the weights are randomly initialized, and the training starts without any prior knowledge.
+
+<div class="flex justify-center">
+<img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/pretraining.svg" alt="The pretraining of a language model is costly in both time and money.">
+<img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/pretraining-dark.svg" alt="The pretraining of a language model is costly in both time and money.">
+</div>
+
+This pretraining is usually done on very large amounts of data. Therefore, it requires a very large corpus of data, and training can take up to several weeks.
+
+*Fine-tuning*, on the other hand, is the training done **after** a model has been pretrained. To perform fine-tuning, you first acquire a pretrained language model, then perform additional training with a dataset specific to your task. Wait -- why not simply train directly for the final task? There are a couple of reasons:
+
+*  The pretrained model was already trained on a dataset that has some similarities with the fine-tuning dataset. The fine-tuning process is thus able to take advantage of knowledge acquired by the initial model during pretraining (for instance, with NLP problems, the pretrained model will have some kind of statistical understanding of the language you are using for your task). 
+*  Since the pretrained model was already trained on lots of data, the fine-tuning requires way less data to get decent results.
+*  For the same reason, the amount of time and resources needed to get good results are much lower.
+
+For example, one could leverage a pretrained model trained on the English language and then fine-tune it on an arXiv corpus, resulting in a science/research-based model. The fine-tuning will only require a limited amount of data: the knowledge the pretrained model has acquired is "transferred," hence the term *transfer learning*.
+
+<div class="flex justify-center">
+<img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/finetuning.svg" alt="The fine-tuning of a language model is cheaper than pretraining in both time and money.">
+<img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/finetuning-dark.svg" alt="The fine-tuning of a language model is cheaper than pretraining in both time and money.">
+</div>
+
+Fine-tuning a model therefore has lower time, data, financial, and environmental costs. It is also quicker and easier to iterate over different fine-tuning schemes, as the training is less constraining than a full pretraining.
+
+This process will also achieve better results than training from scratch (unless you have lots of data), which is why you should always try to leverage a pretrained model -- one as close as possible to the task you have at hand -- and fine-tune it.
+
+## General architecture
+
+In this section, we'll go over the general architecture of the Transformer model. Don't worry if you don't understand some of the concepts; there are detailed sections later covering each of the components.
+
+<Youtube id="H39Z_720T5s" />
+
+## Introduction
+
+The model is primarily composed of two blocks:
+
+* **Encoder (left)**: The encoder receives an input and builds a representation of it (its features). This means that the model is optimized to acquire understanding from the input.
+* **Decoder (right)**: The decoder uses the encoder's representation (features) along with other inputs to generate a target sequence. This means that the model is optimized for generating outputs.
+
+<div class="flex justify-center">
+<img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/transformers_blocks.svg" alt="Architecture of a Transformers models">
+<img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/transformers_blocks-dark.svg" alt="Architecture of a Transformers models">
+</div>
+
+Each of these parts can be used independently, depending on the task: 
+
+* **Encoder-only models**: Good for tasks that require understanding of the input, such as sentence classification and named entity recognition.
+* **Decoder-only models**: Good for generative tasks such as text generation.
+* **Encoder-decoder models** or **sequence-to-sequence models**: Good for generative tasks that require an input, such as translation or summarization.
+
+We will dive into those architectures independently in later sections.
+
+## Attention layers
+
+A key feature of Transformer models is that they are built with special layers called *attention layers*. In fact, the title of the paper introducing the Transformer architecture was ["Attention Is All You Need"](https://arxiv.org/abs/1706.03762)! We will explore the details of attention layers later in the course; for now, all you need to know is that this layer will tell the model to pay specific attention to certain words in the sentence you passed it (and more or less ignore the others) when dealing with the representation of each word.
+
+To put this into context, consider the task of translating text from English to French. Given the input "You like this course", a translation model will need to also attend to the adjacent word "You" to get the proper translation for the word "like", because in French the verb "like" is conjugated differently depending on the subject. The rest of the sentence, however, is not useful for the translation of that word. In the same vein, when translating "this" the model will also need to pay attention to the word "course", because "this" translates differently depending on whether the associated noun is masculine or feminine. Again, the other words in the sentence will not matter for the translation of "this". With more complex sentences (and more complex grammar rules), the model would need to pay special attention to words that might appear farther away in the sentence to properly translate each word.
+
+The same concept applies to any task associated with natural language: a word by itself has a meaning, but that meaning is deeply affected by the context, which can be any other word (or words) before or after the word being studied.
+
+Now that you have an idea of what attention layers are all about, let's take a closer look at the Transformer architecture.
+
+## The original architecture
+
+The Transformer architecture was originally designed for translation. During training, the encoder receives inputs (sentences) in a certain language, while the decoder receives the same sentences in the desired target language. In the encoder, the attention layers can use all the words in a sentence (since, as we just saw, the translation of a given word can be dependent on what is after as well as before it in the sentence). The decoder, however, works sequentially and can only pay attention to the words in the sentence that it has already translated (so, only the words before the word currently being generated). For example, when we have predicted the first three words of the translated target, we give them to the decoder  which then uses all the inputs of the encoder to try to predict the fourth word.
+
+To speed things up during training (when the model has access to target sentences), the decoder is fed the whole target, but it is not allowed to use future words (if it had access to the word at position 2 when trying to predict the word at position 2, the problem would not be very hard!). For instance, when trying to predict the fourth word, the attention layer will only have access to the words in positions 1 to 3.
+
+The original Transformer architecture looked like this, with the encoder on the left and the decoder on the right:
+
+<div class="flex justify-center">
+<img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/transformers.svg" alt="Architecture of a Transformers models">
+<img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/transformers-dark.svg" alt="Architecture of a Transformers models">
+</div>
+
+Note that the the first attention layer in a decoder block pays attention to all (past) inputs to the decoder, but the second attention layer uses the output of the encoder. It can thus access the whole input sentence to best predict the current word. This is very useful as different languages can have grammatical rules that put the words in different orders, or some context provided later in the sentence may be helpful to determine the best translation of a given word.
+
+The *attention mask* can also be used in the encoder/decoder to prevent the model from paying attention to some special words -- for instance, the special padding word used to make all the inputs the same length when batching together sentences.
+
+##  Architectures vs. checkpoints
+
+As we dive into Transformer models in this course, you'll see mentions of *architectures* and *checkpoints* as well as *models*. These terms all have slightly different meanings: 
+
+* **Architecture**: This is the skeleton of the model -- the definition of each layer and each operation that happens within the model. 
+* **Checkpoints**: These are the weights that will be loaded in a given architecture.
+* **Model**: This is an umbrella term that isn't as precise as "architecture" or "checkpoint": it can mean both. This course will specify *architecture* or *checkpoint* when it matters to reduce ambiguity.
+
+For example, BERT is an architecture while `bert-base-cased`, a set of weights trained by the Google team for the first release of BERT, is a checkpoint. However, one can say "the BERT model" and "the `bert-base-cased` model."
diff --git a/chapters/it/chapter1/5.mdx b/chapters/it/chapter1/5.mdx
new file mode 100644
index 000000000..1c707033b
--- /dev/null
+++ b/chapters/it/chapter1/5.mdx
@@ -0,0 +1,17 @@
+# Encoder models
+
+<Youtube id="MUqNwgPjJvQ" />
+
+Encoder models use only the encoder of a Transformer model. At each stage, the attention layers can access all the words in the initial sentence. These models are often characterized as having "bi-directional" attention, and are often called *auto-encoding models*.
+
+The pretraining of these models usually revolves around somehow corrupting a given sentence (for instance, by masking random words in it) and tasking the model with finding or reconstructing the initial sentence.
+
+Encoder models are best suited for tasks requiring an understanding of the full sentence, such as sentence classification, named entity recognition (and more generally word classification), and extractive question answering.
+
+Representatives of this family of models include:
+
+- [ALBERT](https://huggingface.co/transformers/model_doc/albert.html)
+- [BERT](https://huggingface.co/transformers/model_doc/bert.html)
+- [DistilBERT](https://huggingface.co/transformers/model_doc/distilbert.html)
+- [ELECTRA](https://huggingface.co/transformers/model_doc/electra.html)
+- [RoBERTa](https://huggingface.co/transformers/model_doc/roberta.html)
diff --git a/chapters/it/chapter1/6.mdx b/chapters/it/chapter1/6.mdx
new file mode 100644
index 000000000..87ad85ec3
--- /dev/null
+++ b/chapters/it/chapter1/6.mdx
@@ -0,0 +1,16 @@
+# Decoder models
+
+<Youtube id="d_ixlCubqQw" />
+
+Decoder models use only the decoder of a Transformer model. At each stage, for a given word the attention layers can only access the words positioned before it in the sentence. These models are often called *auto-regressive models*.
+
+The pretraining of decoder models usually revolves around predicting the next word in the sentence.
+
+These models are best suited for tasks involving text generation.
+
+Representatives of this family of models include:
+
+- [CTRL](https://huggingface.co/transformers/model_doc/ctrl.html)
+- [GPT](https://huggingface.co/transformers/model_doc/gpt.html)
+- [GPT-2](https://huggingface.co/transformers/model_doc/gpt2.html)
+- [Transformer XL](https://huggingface.co/transformers/model_doc/transformerxl.html)
diff --git a/chapters/it/chapter1/7.mdx b/chapters/it/chapter1/7.mdx
new file mode 100644
index 000000000..3639c2a81
--- /dev/null
+++ b/chapters/it/chapter1/7.mdx
@@ -0,0 +1,16 @@
+# Sequence-to-sequence models
+
+<Youtube id="0_4KEb08xrE" />
+
+Encoder-decoder models (also called *sequence-to-sequence models*) use both parts of the Transformer architecture. At each stage, the attention layers of the encoder can access all the words in the initial sentence, whereas the attention layers of the decoder can only access the words positioned before a given word in the input.
+
+The pretraining of these models can be done using the objectives of encoder or decoder models, but usually involves something a bit more complex. For instance, [T5](https://huggingface.co/t5-base) is pretrained by replacing random spans of text (that can contain several words) with a single mask special word, and the objective is then to predict the text that this mask word replaces.
+
+Sequence-to-sequence models are best suited for tasks revolving around generating new sentences depending on a given input, such as summarization, translation, or generative question answering.
+
+Representatives of this family of models include:
+
+- [BART](https://huggingface.co/transformers/model_doc/bart.html)
+- [mBART](https://huggingface.co/transformers/model_doc/mbart.html)
+- [Marian](https://huggingface.co/transformers/model_doc/marian.html)
+- [T5](https://huggingface.co/transformers/model_doc/t5.html)
diff --git a/chapters/it/chapter1/8.mdx b/chapters/it/chapter1/8.mdx
new file mode 100644
index 000000000..90c80665d
--- /dev/null
+++ b/chapters/it/chapter1/8.mdx
@@ -0,0 +1,32 @@
+# Bias and limitations
+
+<DocNotebookDropdown
+  classNames="absolute z-10 right-0 top-0"
+  options={[
+    {label: "Google Colab", value: "https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter1/section8.ipynb"},
+    {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter1/section8.ipynb"},
+]} />
+
+If your intent is to use a pretrained model or a fine-tuned version in production, please be aware that, while these models are powerful tools, they come with limitations. The biggest of these is that, to enable pretraining on large amounts of data, researchers often scrape all the content they can find, taking the best as well as the worst of what is available on the internet. 
+
+To give a quick illustration, let's go back the example of a `fill-mask` pipeline with the BERT model:
+
+```python
+from transformers import pipeline
+
+unmasker = pipeline("fill-mask", model="bert-base-uncased")
+result = unmasker("This man works as a [MASK].")
+print([r["token_str"] for r in result])
+
+result = unmasker("This woman works as a [MASK].")
+print([r["token_str"] for r in result])
+```
+
+```python out
+['lawyer', 'carpenter', 'doctor', 'waiter', 'mechanic']
+['nurse', 'waitress', 'teacher', 'maid', 'prostitute']
+```
+
+When asked to fill in the missing word in these two sentences, the model gives only one gender-free answer (waiter/waitress). The others are work occupations usually associated with one specific gender -- and yes, prostitute ended up in the top 5 possibilities the model associates with "woman" and "work." This happens even though BERT is one of the rare Transformer models not built by scraping data from all over the internet, but rather using apparently neutral data (it's trained on the [English Wikipedia](https://huggingface.co/datasets/wikipedia) and [BookCorpus](https://huggingface.co/datasets/bookcorpus) datasets). 
+
+When you use these tools, you therefore need to keep in the back of your mind that the original model you are using could very easily generate sexist, racist, or homophobic content. Fine-tuning the model on your data won't make this intrinsic bias disappear.
diff --git a/chapters/it/chapter1/9.mdx b/chapters/it/chapter1/9.mdx
new file mode 100644
index 000000000..4cd91feac
--- /dev/null
+++ b/chapters/it/chapter1/9.mdx
@@ -0,0 +1,11 @@
+# Summary
+
+In this chapter, you saw how to approach different NLP tasks using the high-level `pipeline()` function from 🤗 Transformers. You also saw how to search for and use models in the Hub, as well as how to use the Inference API to test the models directly in your browser.
+
+We discussed how Transformer models work at a high level, and talked about the importance of transfer learning and fine-tuning. A key aspect is that you can use the full architecture or only the encoder or decoder, depending on what kind of task you aim to solve. The following table summarizes this:
+
+| Model           | Examples                                   | Tasks                                                                            |
+|-----------------|--------------------------------------------|----------------------------------------------------------------------------------|
+| Encoder         | ALBERT, BERT, DistilBERT, ELECTRA, RoBERTa | Sentence classification, named entity recognition, extractive question answering |
+| Decoder         | CTRL, GPT, GPT-2, Transformer XL           | Text generation                                                                  |
+| Encoder-decoder | BART, T5, Marian, mBART                    | Summarization, translation, generative question answering                        |

From b2e223d9804f3438e98c2755abc57120abf45f3c Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 29 Mar 2022 19:32:42 +0100
Subject: [PATCH 011/127] Delete test

---
 chapters/it/chapter1/test | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 chapters/it/chapter1/test

diff --git a/chapters/it/chapter1/test b/chapters/it/chapter1/test
deleted file mode 100644
index 8b1378917..000000000
--- a/chapters/it/chapter1/test
+++ /dev/null
@@ -1 +0,0 @@
-

From 25046587d66ce84936f252278085161643e6d65e Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 29 Mar 2022 19:33:04 +0100
Subject: [PATCH 012/127] Add files via upload

---
 chapters/it/_toctree.yml | 173 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 173 insertions(+)
 create mode 100644 chapters/it/_toctree.yml

diff --git a/chapters/it/_toctree.yml b/chapters/it/_toctree.yml
new file mode 100644
index 000000000..d664b9a47
--- /dev/null
+++ b/chapters/it/_toctree.yml
@@ -0,0 +1,173 @@
+- title: 0. Installazione
+  sections:
+  - local: chapter0/1
+    title: Introduzione
+
+- title: 1. Modelli Transformer
+  sections:
+  - local: chapter1/1
+    title: Introduzione
+  - local: chapter1/2
+    title: Trattamento Automatico del Linguaggio
+  - local: chapter1/3
+    title: Cosa fanno i Transformers?
+  - local: chapter1/4
+    title: Come funzionano i Transformers?
+  - local: chapter1/5
+    title: Modelli Encoder
+  - local: chapter1/6
+    title: Modelli Decoder
+  - local: chapter1/7
+    title: Modelli Sequence-to-sequence
+  - local: chapter1/8
+    title: Bias e limiti
+  - local: chapter1/9
+    title: Riassunto
+  - local: chapter1/10
+    title: Quiz di fine capitolo
+    quiz: 1
+
+- title: 2. Utilizzo dei Transformers di 🤗
+  sections:
+  - local: chapter2/1
+    title: Introduzione
+  - local: chapter2/2
+    title: Dietro la pipeline
+  - local: chapter2/3
+    title: Modelli
+  - local: chapter2/4
+    title: Tokenizzatori
+  - local: chapter2/5
+    title: Gestione di sequenze multiple
+  - local: chapter2/6
+    title: Mettere tutto insieme
+  - local: chapter2/7
+    title: Usi di base completati!
+  - local: chapter2/8
+    title: Quiz di fine capitolo
+    quiz: 2
+
+- title: 3. Fine-tuning a pretrained model
+  sections:
+  - local: chapter3/1
+    title: Introduction
+  - local: chapter3/2
+    title: Processing the data
+  - local: chapter3/3
+    title: Fine-tuning a model with the Trainer API or Keras
+    local_fw: { pt: chapter3/3, tf: chapter3/3_tf }
+  - local: chapter3/4
+    title: A full training
+  - local: chapter3/5
+    title: Fine-tuning, Check!
+  - local: chapter3/6
+    title: End-of-chapter quiz
+    quiz: 3
+
+- title: 4. Sharing models and tokenizers
+  sections:
+  - local: chapter4/1
+    title: The Hugging Face Hub
+  - local: chapter4/2
+    title: Using pretrained models
+  - local: chapter4/3
+    title: Sharing pretrained models
+  - local: chapter4/4
+    title: Building a model card
+  - local: chapter4/5
+    title: Part 1 completed!
+  - local: chapter4/6
+    title: End-of-chapter quiz
+    quiz: 4
+
+- title: 5. The 🤗 Datasets library
+  sections:
+  - local: chapter5/1
+    title: Introduction
+  - local: chapter5/2
+    title: What if my dataset isn't on the Hub?
+  - local: chapter5/3
+    title: Time to slice and dice
+  - local: chapter5/4
+    title: Big data? 🤗 Datasets to the rescue!
+  - local: chapter5/5
+    title: Creating your own dataset
+  - local: chapter5/6
+    title: Semantic search with FAISS
+  - local: chapter5/7
+    title: 🤗 Datasets, check!
+  - local: chapter5/8
+    title: End-of-chapter quiz
+    quiz: 5
+
+- title: 6. The 🤗 Tokenizers library
+  sections:
+  - local: chapter6/1
+    title: Introduction
+  - local: chapter6/2
+    title: Training a new tokenizer from an old one
+  - local: chapter6/3
+    title: Fast tokenizers' special powers
+  - local: chapter6/3b
+    title: Fast tokenizers in the QA pipeline
+  - local: chapter6/4
+    title: Normalization and pre-tokenization
+  - local: chapter6/5
+    title: Byte-Pair Encoding tokenization
+  - local: chapter6/6
+    title: WordPiece tokenization
+  - local: chapter6/7
+    title: Unigram tokenization
+  - local: chapter6/8
+    title: Building a tokenizer, block by block
+  - local: chapter6/9
+    title: Tokenizers, check!
+  - local: chapter6/10
+    title: End-of-chapter quiz
+    quiz: 6
+
+- title: 7. Main NLP tasks
+  sections:
+  - local: chapter7/1
+    title: Introduction
+  - local: chapter7/2
+    title: Token classification
+  - local: chapter7/3
+    title: Fine-tuning a masked language model
+  - local: chapter7/4
+    title: Translation
+  - local: chapter7/5
+    title: Summarization
+  - local: chapter7/6
+    title: Training a causal language model from scratch
+  - local: chapter7/7
+    title: Question answering
+  - local: chapter7/8
+    title: Mastering NLP
+  - local: chapter7/9
+    title: End-of-chapter quiz
+    quiz: 7
+
+- title: 8. How to ask for help
+  sections:
+  - local: chapter8/1
+    title: Introduction
+  - local: chapter8/2
+    title: What to do when you get an error
+  - local: chapter8/3
+    title: Asking for help on the forums
+  - local: chapter8/4
+    title: Debugging the training pipeline
+    local_fw: { pt: chapter8/4, tf: chapter8/4_tf }
+  - local: chapter8/5
+    title: How to write a good issue
+  - local: chapter8/6
+    title: Part 2 completed!
+  - local: chapter8/7
+    title: End-of-chapter quiz
+    quiz: 8
+
+- title: Hugging Face Course Event
+  sections:
+  - local: event/1
+    title: Part 2 Release Event

From 8fe401cc409b50a108cd07368a1debff52775f3e Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 29 Mar 2022 19:33:37 +0100
Subject: [PATCH 013/127] Add files via upload

---
 chapters/it/chapter2/1.mdx |  20 +++
 chapters/it/chapter2/2.mdx | 353 +++++++++++++++++++++++++++++++++++++
 chapters/it/chapter2/3.mdx | 228 ++++++++++++++++++++++++
 chapters/it/chapter2/4.mdx | 240 +++++++++++++++++++++++++
 chapters/it/chapter2/5.mdx | 338 +++++++++++++++++++++++++++++++++++
 chapters/it/chapter2/6.mdx | 164 +++++++++++++++++
 chapters/it/chapter2/7.mdx |  13 ++
 chapters/it/chapter2/8.mdx | 305 ++++++++++++++++++++++++++++++++
 8 files changed, 1661 insertions(+)
 create mode 100644 chapters/it/chapter2/1.mdx
 create mode 100644 chapters/it/chapter2/2.mdx
 create mode 100644 chapters/it/chapter2/3.mdx
 create mode 100644 chapters/it/chapter2/4.mdx
 create mode 100644 chapters/it/chapter2/5.mdx
 create mode 100644 chapters/it/chapter2/6.mdx
 create mode 100644 chapters/it/chapter2/7.mdx
 create mode 100644 chapters/it/chapter2/8.mdx

diff --git a/chapters/it/chapter2/1.mdx b/chapters/it/chapter2/1.mdx
new file mode 100644
index 000000000..9ab184b82
--- /dev/null
+++ b/chapters/it/chapter2/1.mdx
@@ -0,0 +1,20 @@
+# Introduction
+
+As you saw in [Chapter 1](/course/chapter1), Transformer models are usually very large. With millions to tens of *billions* of parameters, training and deploying these models is a complicated undertaking. Furthermore, with new models being released on a near-daily basis and each having its own implementation, trying them all out is no easy task.
+
+The 🤗 Transformers library was created to solve this problem. Its goal is to provide a single API through which any Transformer model can be loaded, trained, and saved. The library's main features are:
+
+- **Ease of use**: Downloading, loading, and using a state-of-the-art NLP model for inference can be done in just two lines of code.
+- **Flexibility**: At their core, all models are simple PyTorch `nn.Module` or TensorFlow `tf.keras.Model` classes and can be handled like any other models in their respective machine learning (ML) frameworks.
+- **Simplicity**: Hardly any abstractions are made across the library. The "All in one file" is a core concept: a model's forward pass is entirely defined in a single file, so that the code itself is understandable and hackable.
+
+This last feature makes 🤗 Transformers quite different from other ML libraries. The models are not built on modules 
+that are shared across files; instead, each model has its own layers. In addition to making the models more approachable and understandable, this allows you to easily experiment on one model without affecting others.
+
+This chapter will begin with an end-to-end example where we use a model and a tokenizer together to replicate the `pipeline()` function introduced in [Chapter 1](/course/chapter1). Next, we'll discuss the model API: we'll dive into the model and configuration classes, and show you how to load a model and how it processes numerical inputs to output predictions. 
+
+Then we'll look at the tokenizer API, which is the other main component of the `pipeline()` function. Tokenizers take care of the first and last processing steps, handling the conversion from text to numerical inputs for the neural network, and the conversion back to text when it is needed. Finally, we'll show you how to handle sending multiple sentences through a model in a prepared batch, then wrap it all up with a closer look at the high-level `tokenizer()` function.
+
+<Tip>
+⚠️ In order to benefit from all features available with the Model Hub and 🤗 Transformers, we recommend <a href="https://huggingface.co/join">creating an account</a>.
+</Tip>
\ No newline at end of file
diff --git a/chapters/it/chapter2/2.mdx b/chapters/it/chapter2/2.mdx
new file mode 100644
index 000000000..a7715efc7
--- /dev/null
+++ b/chapters/it/chapter2/2.mdx
@@ -0,0 +1,353 @@
+<FrameworkSwitchCourse {fw} />
+
+# Behind the pipeline
+
+{#if fw === 'pt'}
+
+<DocNotebookDropdown
+  classNames="absolute z-10 right-0 top-0"
+  options={[
+    {label: "Google Colab", value: "https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter2/section2_pt.ipynb"},
+    {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter2/section2_pt.ipynb"},
+]} />
+
+{:else}
+
+<DocNotebookDropdown
+  classNames="absolute z-10 right-0 top-0"
+  options={[
+    {label: "Google Colab", value: "https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter2/section2_tf.ipynb"},
+    {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter2/section2_tf.ipynb"},
+]} />
+
+{/if}
+
+<Tip>
+This is the first section where the content is slightly different depending on whether you use PyTorch and TensorFlow. Toogle the switch on top of the title to select the platform you prefer!
+</Tip>
+
+{#if fw === 'pt'}
+<Youtube id="1pedAIvTWXk"/>
+{:else}
+<Youtube id="wVN12smEvqg"/>
+{/if}
+
+Let's start with a complete example, taking a look at what happened behind the scenes when we executed the following code in [Chapter 1](/course/chapter1):
+
+```python
+from transformers import pipeline
+
+classifier = pipeline("sentiment-analysis")
+classifier(
+    [
+        "I've been waiting for a HuggingFace course my whole life.",
+        "I hate this so much!",
+    ]
+)
+```
+
+and obtained:
+
+```python out
+[{'label': 'POSITIVE', 'score': 0.9598047137260437},
+ {'label': 'NEGATIVE', 'score': 0.9994558095932007}]
+```
+
+As we saw in [Chapter 1](/course/chapter1), this pipeline groups together three steps: preprocessing, passing the inputs through the model, and postprocessing:
+
+<div class="flex justify-center">
+<img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter2/full_nlp_pipeline.svg" alt="The full NLP pipeline: tokenization of text, conversion to IDs, and inference through the Transformer model and the model head."/>
+<img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter2/full_nlp_pipeline-dark.svg" alt="The full NLP pipeline: tokenization of text, conversion to IDs, and inference through the Transformer model and the model head."/>
+</div>
+
+Let's quickly go over each of these.
+
+## Preprocessing with a tokenizer
+
+Like other neural networks, Transformer models can't process raw text directly, so the first step of our pipeline is to convert the text inputs into numbers that the model can make sense of. To do this we use a *tokenizer*, which will be responsible for:
+
+- Splitting the input into words, subwords, or symbols (like punctuation) that are called *tokens*
+- Mapping each token to an integer
+- Adding additional inputs that may be useful to the model
+
+All this preprocessing needs to be done in exactly the same way as when the model was pretrained, so we first need to download that information from the [Model Hub](https://huggingface.co/models). To do this, we use the `AutoTokenizer` class and its `from_pretrained()` method. Using the checkpoint name of our model, it will automatically fetch the data associated with the model's tokenizer and cache it (so it's only downloaded the first time you run the code below).
+
+Since the default checkpoint of the `sentiment-analysis` pipeline is `distilbert-base-uncased-finetuned-sst-2-english` (you can see its model card [here](https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english)), we run the following:
+
+```python
+from transformers import AutoTokenizer
+
+checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+```
+
+Once we have the tokenizer, we can directly pass our sentences to it and we'll get back a dictionary that's ready to feed to our model! The only thing left to do is to convert the list of input IDs to tensors.
+
+You can use 🤗 Transformers without having to worry about which ML framework is used as a backend; it might be PyTorch or TensorFlow, or Flax for some models. However, Transformer models only accept *tensors* as input. If this is your first time hearing about tensors, you can think of them as NumPy arrays instead. A NumPy array can be a scalar (0D), a vector (1D), a matrix (2D), or have more dimensions. It's effectively a tensor; other ML frameworks' tensors behave similarly, and are usually as simple to instantiate as NumPy arrays.
+
+To specify the type of tensors we want to get back (PyTorch, TensorFlow, or plain NumPy), we use the `return_tensors` argument:
+
+{#if fw === 'pt'}
+```python
+raw_inputs = [
+    "I've been waiting for a HuggingFace course my whole life.",
+    "I hate this so much!",
+]
+inputs = tokenizer(raw_inputs, padding=True, truncation=True, return_tensors="pt")
+print(inputs)
+```
+{:else}
+```python
+raw_inputs = [
+    "I've been waiting for a HuggingFace course my whole life.",
+    "I hate this so much!",
+]
+inputs = tokenizer(raw_inputs, padding=True, truncation=True, return_tensors="tf")
+print(inputs)
+```
+{/if}
+
+Don't worry about padding and truncation just yet; we'll explain those later. The main things to remember here are that you can pass one sentence or a list of sentences, as well as specifying the type of tensors you want to get back (if no type is passed, you will get a list of lists as a result).
+
+{#if fw === 'pt'}
+
+Here's what the results look like as PyTorch tensors:
+
+```python out
+{
+    'input_ids': tensor([
+        [  101,  1045,  1005,  2310,  2042,  3403,  2005,  1037, 17662, 12172, 2607,  2026,  2878,  2166,  1012,   102],
+        [  101,  1045,  5223,  2023,  2061,  2172,   999,   102,     0,     0,     0,     0,     0,     0,     0,     0]
+    ]), 
+    'attention_mask': tensor([
+        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+        [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+    ])
+}
+```
+{:else}
+
+Here's what the results look like as TensorFlow tensors:
+
+```python out
+{
+    'input_ids': <tf.Tensor: shape=(2, 16), dtype=int32, numpy=
+        array([
+            [  101,  1045,  1005,  2310,  2042,  3403,  2005,  1037, 17662, 12172,  2607,  2026,  2878,  2166,  1012,   102],
+            [  101,  1045,  5223,  2023,  2061,  2172,   999,   102,     0,     0,     0,     0,     0,     0,     0,     0]
+        ], dtype=int32)>, 
+    'attention_mask': <tf.Tensor: shape=(2, 16), dtype=int32, numpy=
+        array([
+            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+            [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+        ], dtype=int32)>
+}
+```
+{/if}
+
+The output itself is a dictionary containing two keys, `input_ids` and `attention_mask`. `input_ids` contains two rows of integers (one for each sentence) that are the unique identifiers of the tokens in each sentence. We'll explain what the `attention_mask` is later in this chapter. 
+
+## Going through the model
+
+{#if fw === 'pt'}
+We can download our pretrained model the same way we did with our tokenizer. 🤗 Transformers provides an `AutoModel` class which also has a `from_pretrained()` method:
+
+```python
+from transformers import AutoModel
+
+checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
+model = AutoModel.from_pretrained(checkpoint)
+```
+{:else}
+We can download our pretrained model the same way we did with our tokenizer. 🤗 Transformers provides an `TFAutoModel` class which also has a `from_pretrained` method:
+
+```python
+from transformers import TFAutoModel
+
+checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
+model = TFAutoModel.from_pretrained(checkpoint)
+```
+{/if}
+
+In this code snippet, we have downloaded the same checkpoint we used in our pipeline before (it should actually have been cached already) and instantiated a model with it.
+
+This architecture contains only the base Transformer module: given some inputs, it outputs what we'll call *hidden states*, also known as *features*. For each model input, we'll retrieve a high-dimensional vector representing the **contextual understanding of that input by the Transformer model**.
+
+If this doesn't make sense, don't worry about it. We'll explain it all later.
+
+While these hidden states can be useful on their own, they're usually inputs to another part of the model, known as the *head*. In [Chapter 1](/course/chapter1), the different tasks could have been performed with the same architecture, but each of these tasks will have a different head associated with it.
+
+### A high-dimensional vector?
+
+The vector output by the Transformer module is usually large. It generally has three dimensions:
+
+- **Batch size**: The number of sequences processed at a time (2 in our example).
+- **Sequence length**: The length of the numerical representation of the sequence (16 in our example).
+- **Hidden size**: The vector dimension of each model input.
+
+It is said to be "high dimensional" because of the last value. The hidden size can be very large (768 is common for smaller models, and in larger models this can reach 3072 or more).
+
+We can see this if we feed the inputs we preprocessed to our model:
+
+{#if fw === 'pt'}
+```python
+outputs = model(**inputs)
+print(outputs.last_hidden_state.shape)
+```
+
+```python out
+torch.Size([2, 16, 768])
+```
+{:else}
+```py
+outputs = model(inputs)
+print(outputs.last_hidden_state.shape)
+```
+
+```python out
+(2, 16, 768)
+```
+{/if}
+
+Note that the outputs of 🤗 Transformers models behave like `namedtuple`s or dictionaries. You can access the elements by attributes (like we did) or by key (`outputs["last_hidden_state"]`), or even by index if you know exactly where the thing you are looking for is (`outputs[0]`).
+
+### Model heads: Making sense out of numbers
+
+The model heads take the high-dimensional vector of hidden states as input and project them onto a different dimension. They are usually composed of one or a few linear layers:
+
+<div class="flex justify-center">
+<img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter2/transformer_and_head.svg" alt="A Transformer network alongside its head."/>
+<img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter2/transformer_and_head-dark.svg" alt="A Transformer network alongside its head."/>
+</div>
+
+The output of the Transformer model is sent directly to the model head to be processed.
+
+In this diagram, the model is represented by its embeddings layer and the subsequent layers. The embeddings layer converts each input ID in the tokenized input into a vector that represents the associated token. The subsequent layers manipulate those vectors using the attention mechanism to produce the final representation of the sentences.
+
+There are many different architectures available in 🤗 Transformers, with each one designed around tackling a specific task. Here is a non-exhaustive list:
+
+- `*Model` (retrieve the hidden states)
+- `*ForCausalLM`
+- `*ForMaskedLM`
+- `*ForMultipleChoice`
+- `*ForQuestionAnswering`
+- `*ForSequenceClassification`
+- `*ForTokenClassification`
+- and others 🤗
+
+{#if fw === 'pt'}
+For our example, we will need a model with a sequence classification head (to be able to classify the sentences as positive or negative). So, we won't actually use the `AutoModel` class, but `AutoModelForSequenceClassification`:
+
+```python
+from transformers import AutoModelForSequenceClassification
+
+checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
+model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
+outputs = model(**inputs)
+```
+{:else}
+For our example, we will need a model with a sequence classification head (to be able to classify the sentences as positive or negative). So, we won't actually use the `TFAutoModel` class, but `TFAutoModelForSequenceClassification`:
+
+```python
+from transformers import TFAutoModelForSequenceClassification
+
+checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
+model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint)
+outputs = model(inputs)
+```
+{/if}
+
+Now if we look at the shape of our inputs, the dimensionality will be much lower: the model head takes as input the high-dimensional vectors we saw before, and outputs vectors containing two values (one per label):
+
+```python
+print(outputs.logits.shape)
+```
+
+{#if fw === 'pt'}
+```python out
+torch.Size([2, 2])
+```
+{:else}
+```python out
+(2, 2)
+```
+{/if}
+
+Since we have just two sentences and two labels, the result we get from our model is of shape 2 x 2.
+
+## Postprocessing the output
+
+The values we get as output from our model don't necessarily make sense by themselves. Let's take a look:
+
+```python
+print(outputs.logits)
+```
+
+{#if fw === 'pt'}
+```python out
+tensor([[-1.5607,  1.6123],
+        [ 4.1692, -3.3464]], grad_fn=<AddmmBackward>)
+```
+{:else}
+```python out
+<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
+    array([[-1.5606991,  1.6122842],
+           [ 4.169231 , -3.3464472]], dtype=float32)>
+```
+{/if}
+
+Our model predicted `[-1.5607, 1.6123]` for the first sentence and `[ 4.1692, -3.3464]` for the second one. Those are not probabilities but *logits*, the raw, unnormalized scores outputted by the last layer of the model. To be converted to probabilities, they need to go through a [SoftMax](https://en.wikipedia.org/wiki/Softmax_function) layer (all 🤗 Transformers models output the logits, as the loss function for training will generally fuse the last activation function, such as SoftMax, with the actual loss function, such as cross entropy):
+
+{#if fw === 'pt'}
+```py
+import torch
+
+predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
+print(predictions)
+```
+{:else}
+```py
+import tensorflow as tf
+
+predictions = tf.math.softmax(outputs.logits, axis=-1)
+print(predictions)
+```
+{/if}
+
+{#if fw === 'pt'}
+```python out
+tensor([[4.0195e-02, 9.5980e-01],
+        [9.9946e-01, 5.4418e-04]], grad_fn=<SoftmaxBackward>)
+```
+{:else}
+```python out
+tf.Tensor(
+[[4.01951671e-02 9.59804833e-01]
+ [9.9945587e-01 5.4418424e-04]], shape=(2, 2), dtype=float32)
+```
+{/if}
+
+Now we can see that the model predicted `[0.0402, 0.9598]` for the first sentence and `[0.9995,  0.0005]` for the second one. These are recognizable probability scores.
+
+To get the labels corresponding to each position, we can inspect the `id2label` attribute of the model config (more on this in the next section):
+
+```python
+model.config.id2label
+```
+
+```python out
+{0: 'NEGATIVE', 1: 'POSITIVE'}
+```
+
+Now we can conclude that the model predicted the following:
+ 
+- First sentence: NEGATIVE: 0.0402, POSITIVE: 0.9598
+- Second sentence: NEGATIVE: 0.9995, POSITIVE: 0.0005
+
+We have successfully reproduced the three steps of the pipeline: preprocessing with tokenizers, passing the inputs through the model, and postprocessing! Now let's take some time to dive deeper into each of those steps.
+
+<Tip>
+
+✏️ **Try it out!** Choose two (or more) texts of your own and run them through the `sentiment-analysis` pipeline. Then replicate the steps you saw here yourself and check that you obtain the same results!
+
+</Tip>
diff --git a/chapters/it/chapter2/3.mdx b/chapters/it/chapter2/3.mdx
new file mode 100644
index 000000000..c9100c42c
--- /dev/null
+++ b/chapters/it/chapter2/3.mdx
@@ -0,0 +1,228 @@
+<FrameworkSwitchCourse {fw} />
+
+# Models
+
+{#if fw === 'pt'}
+
+<DocNotebookDropdown
+  classNames="absolute z-10 right-0 top-0"
+  options={[
+    {label: "Google Colab", value: "https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter2/section3_pt.ipynb"},
+    {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter2/section3_pt.ipynb"},
+]} />
+
+{:else}
+
+<DocNotebookDropdown
+  classNames="absolute z-10 right-0 top-0"
+  options={[
+    {label: "Google Colab", value: "https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter2/section3_tf.ipynb"},
+    {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter2/section3_tf.ipynb"},
+]} />
+
+{/if}
+
+{#if fw === 'pt'}
+<Youtube id="AhChOFRegn4"/>
+{:else}
+<Youtube id="d3JVgghSOew"/>
+{/if}
+
+{#if fw === 'pt'}
+In this section we'll take a closer look at creating and using a model. We'll use the `AutoModel` class, which is handy when you want to instantiate any model from a checkpoint.
+
+The `AutoModel` class and all of its relatives are actually simple wrappers over the wide variety of models available in the library. It's a clever wrapper as it can automatically guess the appropriate model architecture for your checkpoint, and then instantiates a model with this architecture.
+
+{:else}
+In this section we'll take a closer look at creating and using a model. We'll use the `TFAutoModel` class, which is handy when you want to instantiate any model from a checkpoint.
+
+The `TFAutoModel` class and all of its relatives are actually simple wrappers over the wide variety of models available in the library. It's a clever wrapper as it can automatically guess the appropriate model architecture for your checkpoint, and then instantiates a model with this architecture.
+
+{/if}
+
+However, if you know the type of model you want to use, you can use the class that defines its architecture directly. Let's take a look at how this works with a BERT model.
+
+## Creating a Transformer
+
+The first thing we'll need to do to initialize a BERT model is load a configuration object:
+
+{#if fw === 'pt'}
+```py
+from transformers import BertConfig, BertModel
+
+# Building the config
+config = BertConfig()
+
+# Building the model from the config
+model = BertModel(config)
+```
+{:else}
+```py
+from transformers import BertConfig, TFBertModel
+
+# Building the config
+config = BertConfig()
+
+# Building the model from the config
+model = TFBertModel(config)
+```
+{/if}
+
+The configuration contains many attributes that are used to build the model:
+
+```py
+print(config)
+```
+
+```python out
+BertConfig {
+  [...]
+  "hidden_size": 768,
+  "intermediate_size": 3072,
+  "max_position_embeddings": 512,
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  [...]
+}
+```
+
+While you haven't seen what all of these attributes do yet, you should recognize some of them: the `hidden_size` attribute defines the size of the `hidden_states` vector, and `num_hidden_layers` defines the number of layers the Transformer model has.
+
+### Different loading methods
+
+Creating a model from the default configuration initializes it with random values:
+
+{#if fw === 'pt'}
+```py
+from transformers import BertConfig, BertModel
+
+config = BertConfig()
+model = BertModel(config)
+
+# Model is randomly initialized!
+```
+{:else}
+```py
+from transformers import BertConfig, TFBertModel
+
+config = BertConfig()
+model = TFBertModel(config)
+
+# Model is randomly initialized!
+```
+{/if}
+
+The model can be used in this state, but it will output gibberish; it needs to be trained first. We could train the model from scratch on the task at hand, but as you saw in [Chapter 1](/course/chapter1), this would require a long time and a lot of data, and it would have a non-negligible environmental impact. To avoid unnecessary and duplicated effort, it's imperative to be able to share and reuse models that have already been trained.
+
+Loading a Transformer model that is already trained is simple — we can do this using the `from_pretrained()` method:
+
+{#if fw === 'pt'}
+```py
+from transformers import BertModel
+
+model = BertModel.from_pretrained("bert-base-cased")
+```
+
+As you saw earlier, we could replace `BertModel` with the equivalent `AutoModel` class. We'll do this from now on as this produces checkpoint-agnostic code; if your code works for one checkpoint, it should work seamlessly with another. This applies even if the architecture is different, as long as the checkpoint was trained for a similar task (for example, a sentiment analysis task).
+
+{:else}
+```py
+from transformers import TFBertModel
+
+model = TFBertModel.from_pretrained("bert-base-cased")
+```
+
+As you saw earlier, we could replace `TFBertModel` with the equivalent `TFAutoModel` class. We'll do this from now on as this produces checkpoint-agnostic code; if your code works for one checkpoint, it should work seamlessly with another. This applies even if the architecture is different, as long as the checkpoint was trained for a similar task (for example, a sentiment analysis task).
+
+{/if}
+
+In the code sample above we didn't use `BertConfig`, and instead loaded a pretrained model via the `bert-base-cased` identifier. This is a model checkpoint that was trained by the authors of BERT themselves; you can find more details about it in its [model card](https://huggingface.co/bert-base-cased).
+
+This model is now initialized with all the weights of the checkpoint. It can be used directly for inference on the tasks it was trained on, and it can also be fine-tuned on a new task. By training with pretrained weights rather than from scratch, we can quickly achieve good results.
+
+The weights have been downloaded and cached (so future calls to the `from_pretrained()` method won't re-download them) in the cache folder, which defaults to *~/.cache/huggingface/transformers*. You can customize your cache folder by setting the `HF_HOME` environment variable.
+
+The identifier used to load the model can be the identifier of any model on the Model Hub, as long as it is compatible with the BERT architecture. The entire list of available BERT checkpoints can be found [here](https://huggingface.co/models?filter=bert).
+
+### Saving methods
+
+Saving a model is as easy as loading one — we use the `save_pretrained()` method, which is analogous to the `from_pretrained()` method:
+
+```py
+model.save_pretrained("directory_on_my_computer")
+```
+
+This saves two files to your disk:
+
+{#if fw === 'pt'}
+```
+ls directory_on_my_computer
+
+config.json pytorch_model.bin
+```
+{:else}
+```
+ls directory_on_my_computer
+
+config.json tf_model.h5
+```
+{/if}
+
+If you take a look at the *config.json* file, you'll recognize the attributes necessary to build the model architecture. This file also contains some metadata, such as where the checkpoint originated and what 🤗 Transformers version you were using when you last saved the checkpoint.
+
+{#if fw === 'pt'}
+The *pytorch_model.bin* file is known as the *state dictionary*; it contains all your model's weights. The two files go hand in hand; the configuration is necessary to know your model's architecture, while the model weights are your model's parameters.
+
+{:else}
+The *tf_model.h5* file is known as the *state dictionary*; it contains all your model's weights. The two files go hand in hand; the configuration is necessary to know your model's architecture, while the model weights are your model's parameters.
+
+{/if}
+
+## Using a Transformer model for inference
+
+Now that you know how to load and save a model, let's try using it to make some predictions. Transformer models can only process numbers — numbers that the tokenizer generates. But before we discuss tokenizers, let's explore what inputs the model accepts.
+
+Tokenizers can take care of casting the inputs to the appropriate framework's tensors, but to help you understand what's going on, we'll take a quick look at what must be done before sending the inputs to the model.
+
+Let's say we have a couple of sequences:
+
+```py
+sequences = ["Hello!", "Cool.", "Nice!"]
+```
+
+The tokenizer converts these to vocabulary indices which are typically called *input IDs*. Each sequence is now a list of numbers! The resulting output is:
+
+```py no-format
+encoded_sequences = [
+    [101, 7592, 999, 102],
+    [101, 4658, 1012, 102],
+    [101, 3835, 999, 102],
+]
+```
+
+This is a list of encoded sequences: a list of lists. Tensors only accept rectangular shapes (think matrices). This "array" is already of rectangular shape, so converting it to a tensor is easy:
+
+{#if fw === 'pt'}
+```py
+import torch
+
+model_inputs = torch.tensor(encoded_sequences)
+```
+{:else}
+```py
+import tensorflow as tf
+
+model_inputs = tf.constant(encoded_sequences)
+```
+{/if}
+
+### Using the tensors as inputs to the model
+
+Making use of the tensors with the model is extremely simple — we just call the model with the inputs:
+
+```py
+output = model(model_inputs)
+```
+
+While the model accepts a lot of different arguments, only the input IDs are necessary. We'll explain what the other arguments do and when they are required later, 
+but first we need to take a closer look at the tokenizers that build the inputs that a Transformer model can understand.
diff --git a/chapters/it/chapter2/4.mdx b/chapters/it/chapter2/4.mdx
new file mode 100644
index 000000000..ccebe04ec
--- /dev/null
+++ b/chapters/it/chapter2/4.mdx
@@ -0,0 +1,240 @@
+<FrameworkSwitchCourse {fw} />
+
+# Tokenizers
+
+{#if fw === 'pt'}
+
+<DocNotebookDropdown
+  classNames="absolute z-10 right-0 top-0"
+  options={[
+    {label: "Google Colab", value: "https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter2/section4_pt.ipynb"},
+    {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter2/section4_pt.ipynb"},
+]} />
+
+{:else}
+
+<DocNotebookDropdown
+  classNames="absolute z-10 right-0 top-0"
+  options={[
+    {label: "Google Colab", value: "https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter2/section4_tf.ipynb"},
+    {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter2/section4_tf.ipynb"},
+]} />
+
+{/if}
+
+<Youtube id="VFp38yj8h3A"/>
+
+Tokenizers are one of the core components of the NLP pipeline. They serve one purpose: to translate text into data that can be processed by the model. Models can only process numbers, so tokenizers need to convert our text inputs to numerical data. In this section, we'll explore exactly what happens in the tokenization pipeline. 
+
+In NLP tasks, the data that is generally processed is raw text. Here's an example of such text:
+
+```
+Jim Henson was a puppeteer
+```
+
+However, models can only process numbers, so we need to find a way to convert the raw text to numbers. That's what the tokenizers do, and there are a lot of ways to go about this. The goal is to find the most meaningful representation — that is, the one that makes the most sense to the model — and, if possible, the smallest representation.
+
+Let's take a look at some examples of tokenization algorithms, and try to answer some of the questions you may have about tokenization.
+
+## Word-based
+
+<Youtube id="nhJxYji1aho"/>
+
+The first type of tokenizer that comes to mind is _word-based_. It's generally very easy to set up and use with only a few rules, and it often yields decent results. For example, in the image below, the goal is to split the raw text into words and find a numerical representation for each of them:
+
+<div class="flex justify-center">
+  <img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter2/word_based_tokenization.svg" alt="An example of word-based tokenization."/>
+  <img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter2/word_based_tokenization-dark.svg" alt="An example of word-based tokenization."/>
+</div>
+
+There are different ways to split the text. For example, we could could use whitespace to tokenize the text into words by applying Python's `split()` function:
+
+```py
+tokenized_text = "Jim Henson was a puppeteer".split()
+print(tokenized_text)
+```
+
+```python out
+['Jim', 'Henson', 'was', 'a', 'puppeteer']
+```
+
+There are also variations of word tokenizers that have extra rules for punctuation. With this kind of tokenizer, we can end up with some pretty large "vocabularies," where a vocabulary is defined by the total number of independent tokens that we have in our corpus.
+
+Each word gets assigned an ID, starting from 0 and going up to the size of the vocabulary. The model uses these IDs to identify each word.
+
+If we want to completely cover a language with a word-based tokenizer, we'll need to have an identifier for each word in the language, which will generate a huge amount of tokens. For example, there are over 500,000 words in the English language, so to build a map from each word to an input ID we'd need to keep track of that many IDs. Furthermore, words like "dog" are represented differently from words like "dogs", and the model will initially have no way of knowing that "dog" and "dogs" are similar: it will identify the two words as unrelated. The same applies to other similar words, like "run" and "running", which the model will not see as being similar initially.
+
+Finally, we need a custom token to represent words that are not in our vocabulary. This is known as the "unknown" token, often represented as "[UNK]" or "&lt;unk&gt;". It's generally a bad sign if you see that the tokenizer is producing a lot of these tokens, as it wasn't able to retrieve a sensible representation of a word and you're losing information along the way. The goal when crafting the vocabulary is to do it in such a way that the tokenizer tokenizes as few words as possible into the unknown token.
+
+One way to reduce the amount of unknown tokens is to go one level deeper, using a _character-based_ tokenizer.
+
+## Character-based
+
+<Youtube id="ssLq_EK2jLE"/>
+
+Character-based tokenizers split the text into characters, rather than words. This has two primary benefits:
+
+- The vocabulary is much smaller.
+- There are much fewer out-of-vocabulary (unknown) tokens, since every word can be built from characters.
+
+But here too some questions arise concerning spaces and punctuation:
+
+<div class="flex justify-center">
+  <img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter2/character_based_tokenization.svg" alt="An example of character-based tokenization."/>
+  <img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter2/character_based_tokenization-dark.svg" alt="An example of character-based tokenization."/>
+</div>
+
+This approach isn't perfect either. Since the representation is now based on characters rather than words, one could argue that, intuitively, it's less meaningful: each character doesn't mean a lot on its own, whereas that is the case with words. However, this again differs according to the language; in Chinese, for example, each character carries more information than a character in a Latin language.
+
+Another thing to consider is that we'll end up with a very large amount of tokens to be processed by our model: whereas a word would only be a single token with a word-based tokenizer, it can easily turn into 10 or more tokens when converted into characters.
+
+To get the best of both worlds, we can use a third technique that combines the two approaches: *subword tokenization*.
+
+## Subword tokenization
+
+<Youtube id="zHvTiHr506c"/>
+
+Subword tokenization algorithms rely on the principle that frequently used words should not be split into smaller subwords, but rare words should be decomposed into meaningful subwords.
+
+For instance, "annoyingly" might be considered a rare word and could be decomposed into "annoying" and "ly". These are both likely to appear more frequently as standalone subwords, while at the same time the meaning of "annoyingly" is kept by the composite meaning of "annoying" and "ly".
+
+Here is an example showing how a subword tokenization algorithm would tokenize the sequence "Let's do tokenization!":
+
+<div class="flex justify-center">
+  <img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter2/bpe_subword.svg" alt="A subword tokenization algorithm."/>
+  <img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter2/bpe_subword-dark.svg" alt="A subword tokenization algorithm."/>
+</div>
+
+These subwords end up providing a lot of semantic meaning: for instance, in the example above "tokenization" was split into "token" and "ization", two tokens that have a semantic meaning while being space-efficient (only two tokens are needed to represent a long word). This allows us to have relatively good coverage with small vocabularies, and close to no unknown tokens.
+
+This approach is especially useful in agglutinative languages such as Turkish, where you can form (almost) arbitrarily long complex words by stringing together subwords.
+
+### And more!
+
+Unsurprisingly, there are many more techniques out there. To name a few:
+
+- Byte-level BPE, as used in GPT-2
+- WordPiece, as used in BERT
+- SentencePiece or Unigram, as used in several multilingual models
+
+You should now have sufficient knowledge of how tokenizers work to get started with the API.
+
+## Loading and saving
+
+Loading and saving tokenizers is as simple as it is with models. Actually, it's based on the same two methods: `from_pretrained()` and `save_pretrained()`. These methods will load or save the algorithm used by the tokenizer (a bit like the *architecture* of the model) as well as its vocabulary (a bit like the *weights* of the model).
+
+Loading the BERT tokenizer trained with the same checkpoint as BERT is done the same way as loading the model, except we use the `BertTokenizer` class:
+
+```py
+from transformers import BertTokenizer
+
+tokenizer = BertTokenizer.from_pretrained("bert-base-cased")
+```
+
+{#if fw === 'pt'}
+Similar to `AutoModel`, the `AutoTokenizer` class will grab the proper tokenizer class in the library based on the checkpoint name, and can be used directly with any checkpoint:
+
+{:else}
+Similar to `TFAutoModel`, the `AutoTokenizer` class will grab the proper tokenizer class in the library based on the checkpoint name, and can be used directly with any checkpoint:
+
+{/if}
+
+```py
+from transformers import AutoTokenizer
+
+tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
+```
+
+We can now use the tokenizer as shown in the previous section:
+
+```python
+tokenizer("Using a Transformer network is simple")
+```
+
+```python out
+{'input_ids': [101, 7993, 170, 11303, 1200, 2443, 1110, 3014, 102],
+ 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0],
+ 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1]}
+```
+
+Saving a tokenizer is identical to saving a model:
+
+```py
+tokenizer.save_pretrained("directory_on_my_computer")
+```
+
+We'll talk more about `token_type_ids` in [Chapter 3](/course/chapter3), and we'll explain the `attention_mask` key a little later. First, let's see how the `input_ids` are generated. To do this, we'll need to look at the intermediate methods of the tokenizer.
+
+## Encoding
+
+<Youtube id="Yffk5aydLzg"/>
+
+Translating text to numbers is known as _encoding_. Encoding is done in a two-step process: the tokenization, followed by the conversion to input IDs.
+
+As we've seen, the first step is to split the text into words (or parts of words, punctuation symbols, etc.), usually called *tokens*. There are multiple rules that can govern that process, which is why we need to instantiate the tokenizer using the name of the model, to make sure we use the same rules that were used when the model was pretrained.
+
+The second step is to convert those tokens into numbers, so we can build a tensor out of them and feed them to the model. To do this, the tokenizer has a *vocabulary*, which is the part we download when we instantiate it with the `from_pretrained()` method. Again, we need to use the same vocabulary used when the model was pretrained.
+
+To get a better understanding of the two steps, we'll explore them separately. Note that we will use some methods that perform parts of the tokenization pipeline separately to show you the intermediate results of those steps, but in practice, you should call the tokenizer directly on your inputs (as shown in the section 2).
+
+### Tokenization
+
+The tokenization process is done by the `tokenize()` method of the tokenizer:
+
+```py
+from transformers import AutoTokenizer
+
+tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
+
+sequence = "Using a Transformer network is simple"
+tokens = tokenizer.tokenize(sequence)
+
+print(tokens)
+```
+
+The output of this method is a list of strings, or tokens:
+
+```python out
+['Using', 'a', 'transform', '##er', 'network', 'is', 'simple']
+```
+
+This tokenizer is a subword tokenizer: it splits the words until it obtains tokens that can be represented by its vocabulary. That's the case here with `transformer`, which is split into two tokens: `transform` and `##er`.
+
+### From tokens to input IDs
+
+The conversion to input IDs is handled by the `convert_tokens_to_ids()` tokenizer method:
+
+```py
+ids = tokenizer.convert_tokens_to_ids(tokens)
+
+print(ids)
+```
+
+```python out
+[7993, 170, 11303, 1200, 2443, 1110, 3014]
+```
+
+These outputs, once converted to the appropriate framework tensor, can then be used as inputs to a model as seen earlier in this chapter.
+
+<Tip>
+
+✏️ **Try it out!** Replicate the two last steps (tokenization and conversion to input IDs) on the input sentences we used in section 2 ("I've been waiting for a HuggingFace course my whole life." and "I hate this so much!"). Check that you get the same input IDs we got earlier!
+
+</Tip>
+
+## Decoding
+
+*Decoding* is going the other way around: from vocabulary indices, we want to get a string. This can be done with the `decode()` method as follows:
+
+```py
+decoded_string = tokenizer.decode([7993, 170, 11303, 1200, 2443, 1110, 3014])
+print(decoded_string)
+```
+
+```python out
+'Using a Transformer network is simple'
+```
+
+Note that the `decode` method not only converts the indices back to tokens, but also groups together the tokens that were part of the same words to produce a readable sentence. This behavior will be extremely useful when we use models that predict new text (either text generated from a prompt, or for sequence-to-sequence problems like translation or summarization).
+
+By now you should understand the atomic operations a tokenizer can handle: tokenization, conversion to IDs, and converting IDs back to a string. However, we've just scraped the tip of the iceberg. In the following section, we'll take our approach to its limits and take a look at how to overcome them.
diff --git a/chapters/it/chapter2/5.mdx b/chapters/it/chapter2/5.mdx
new file mode 100644
index 000000000..5a692aa19
--- /dev/null
+++ b/chapters/it/chapter2/5.mdx
@@ -0,0 +1,338 @@
+<FrameworkSwitchCourse {fw} />
+
+# Handling multiple sequences
+
+{#if fw === 'pt'}
+
+<DocNotebookDropdown
+  classNames="absolute z-10 right-0 top-0"
+  options={[
+    {label: "Google Colab", value: "https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter2/section5_pt.ipynb"},
+    {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter2/section5_pt.ipynb"},
+]} />
+
+{:else}
+
+<DocNotebookDropdown
+  classNames="absolute z-10 right-0 top-0"
+  options={[
+    {label: "Google Colab", value: "https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter2/section5_tf.ipynb"},
+    {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter2/section5_tf.ipynb"},
+]} />
+
+{/if}
+
+{#if fw === 'pt'}
+<Youtube id="M6adb1j2jPI"/>
+{:else}
+<Youtube id="ROxrFOEbsQE"/>
+{/if}
+
+In the previous section, we explored the simplest of use cases: doing inference on a single sequence of a small length. However, some questions emerge already:
+
+- How do we handle multiple sequences?
+- How do we handle multiple sequences *of different lengths*?
+- Are vocabulary indices the only inputs that allow a model to work well?
+- Is there such a thing as too long a sequence?
+
+Let's see what kinds of problems these questions pose, and how we can solve them using the 🤗 Transformers API.
+
+## Models expect a batch of inputs
+
+In the previous exercise you saw how sequences get translated into lists of numbers. Let's convert this list of numbers to a tensor and send it to the model:
+
+{#if fw === 'pt'}
+```py
+import torch
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+
+checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
+
+sequence = "I've been waiting for a HuggingFace course my whole life."
+
+tokens = tokenizer.tokenize(sequence)
+ids = tokenizer.convert_tokens_to_ids(tokens)
+input_ids = torch.tensor(ids)
+# This line will fail.
+model(input_ids)
+```
+
+```python out
+IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)
+```
+{:else}
+```py
+import tensorflow as tf
+from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
+
+checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint)
+
+sequence = "I've been waiting for a HuggingFace course my whole life."
+
+tokens = tokenizer.tokenize(sequence)
+ids = tokenizer.convert_tokens_to_ids(tokens)
+input_ids = tf.constant(ids)
+# This line will fail.
+model(input_ids)
+```
+
+```py out
+InvalidArgumentError: Input to reshape is a tensor with 14 values, but the requested shape has 196 [Op:Reshape]
+```
+{/if}
+
+Oh no! Why did this fail? "We followed the steps from the pipeline in section 2.
+
+The problem is that we sent a single sequence to the model, whereas 🤗 Transformers models expect multiple sentences by default. Here we tried to do everything the tokenizer did behind the scenes when we applied it to a `sequence`, but if you look closely, you'll see that it didn't just convert the list of input IDs into a tensor, it added a dimension on top of it:
+
+{#if fw === 'pt'}
+```py
+tokenized_inputs = tokenizer(sequence, return_tensors="pt")
+print(tokenized_inputs["input_ids"])
+```
+
+```python out
+tensor([[  101,  1045,  1005,  2310,  2042,  3403,  2005,  1037, 17662, 12172,
+          2607,  2026,  2878,  2166,  1012,   102]])
+```
+{:else}
+```py
+tokenized_inputs = tokenizer(sequence, return_tensors="tf")
+print(tokenized_inputs["input_ids"])
+```
+
+```py out
+<tf.Tensor: shape=(1, 16), dtype=int32, numpy=
+array([[  101,  1045,  1005,  2310,  2042,  3403,  2005,  1037, 17662,
+        12172,  2607,  2026,  2878,  2166,  1012,   102]], dtype=int32)>
+```
+{/if}
+
+Let's try again and add a new dimension:
+
+{#if fw === 'pt'}
+```py
+import torch
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+
+checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
+
+sequence = "I've been waiting for a HuggingFace course my whole life."
+
+tokens = tokenizer.tokenize(sequence)
+ids = tokenizer.convert_tokens_to_ids(tokens)
+
+input_ids = torch.tensor([ids])
+print("Input IDs:", input_ids)
+
+output = model(input_ids)
+print("Logits:", output.logits)
+```
+{:else}
+```py
+import tensorflow as tf
+from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
+
+checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint)
+
+sequence = "I've been waiting for a HuggingFace course my whole life."
+
+tokens = tokenizer.tokenize(sequence)
+ids = tokenizer.convert_tokens_to_ids(tokens)
+
+input_ids = tf.constant([ids])
+print("Input IDs:", input_ids)
+
+output = model(input_ids)
+print("Logits:", output.logits)
+```
+{/if}
+
+We print the input IDs as well as the resulting logits — here's the output:
+
+{#if fw === 'pt'}
+```python out
+Input IDs: [[ 1045,  1005,  2310,  2042,  3403,  2005,  1037, 17662, 12172,  2607, 2026,  2878,  2166,  1012]]
+Logits: [[-2.7276,  2.8789]]
+```
+{:else}
+```py out
+Input IDs: tf.Tensor(
+[[ 1045  1005  2310  2042  3403  2005  1037 17662 12172  2607  2026  2878
+   2166  1012]], shape=(1, 14), dtype=int32)
+Logits: tf.Tensor([[-2.7276208  2.8789377]], shape=(1, 2), dtype=float32)
+```
+{/if}
+
+*Batching* is the act of sending multiple sentences through the model, all at once. If you only have one sentence, you can just build a batch with a single sequence: 
+
+```
+batched_ids = [ids, ids]
+```
+
+This is a batch of two identical sequences!
+
+<Tip>
+
+✏️ **Try it out!** Convert this `batched_ids` list into a tensor and pass it through your model. Check that you obtain the same logits as before (but twice)!
+
+</Tip>
+
+Batching allows the model to work when you feed it multiple sentences. Using multiple sequences is just as simple as building a batch with a single sequence. There's a second issue, though. When you're trying to batch together two (or more) sentences, they might be of different lengths. If you've ever worked with tensors before, you know that they need to be of rectangular shape, so you won't be able to convert the list of input IDs into a tensor directly. To work around this problem, we usually *pad* the inputs.
+
+## Padding the inputs
+
+The following list of lists cannot be converted to a tensor:
+
+```py no-format
+batched_ids = [
+    [200, 200, 200],
+    [200, 200]
+]
+```
+
+In order to work around this, we'll use *padding* to make our tensors have a rectangular shape. Padding makes sure all our sentences have the same length by adding a special word called the *padding token* to the sentences with fewer values. For example, if you have 10 sentences with 10 words and 1 sentence with 20 words, padding will ensure all the sentences have 20 words. In our example, the resulting tensor looks like this:
+
+```py no-format
+padding_id = 100
+
+batched_ids = [
+    [200, 200, 200],
+    [200, 200, padding_id],
+]
+```
+
+The padding token ID can be found in `tokenizer.pad_token_id`. Let's use it and send our two sentences through the model individually and batched together:
+
+{#if fw === 'pt'}
+```py no-format
+model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
+
+sequence1_ids = [[200, 200, 200]]
+sequence2_ids = [[200, 200]]
+batched_ids = [
+    [200, 200, 200],
+    [200, 200, tokenizer.pad_token_id],
+]
+
+print(model(torch.tensor(sequence1_ids)).logits)
+print(model(torch.tensor(sequence2_ids)).logits)
+print(model(torch.tensor(batched_ids)).logits)
+```
+
+```python out
+tensor([[ 1.5694, -1.3895]], grad_fn=<AddmmBackward>)
+tensor([[ 0.5803, -0.4125]], grad_fn=<AddmmBackward>)
+tensor([[ 1.5694, -1.3895],
+        [ 1.3373, -1.2163]], grad_fn=<AddmmBackward>)
+```
+{:else}
+```py no-format
+model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint)
+
+sequence1_ids = [[200, 200, 200]]
+sequence2_ids = [[200, 200]]
+batched_ids = [
+    [200, 200, 200],
+    [200, 200, tokenizer.pad_token_id],
+]
+
+print(model(tf.constant(sequence1_ids)).logits)
+print(model(tf.constant(sequence2_ids)).logits)
+print(model(tf.constant(batched_ids)).logits)
+```
+
+```py out
+tf.Tensor([[ 1.5693678 -1.3894581]], shape=(1, 2), dtype=float32)
+tf.Tensor([[ 0.5803005  -0.41252428]], shape=(1, 2), dtype=float32)
+tf.Tensor(
+[[ 1.5693681 -1.3894582]
+ [ 1.3373486 -1.2163193]], shape=(2, 2), dtype=float32)
+```
+{/if}
+
+There's something wrong with the logits in our batched predictions: the second row should be the same as the logits for the second sentence, but we've got completely different values!
+
+This is because the key feature of Transformer models is attention layers that *contextualize* each token. These will take into account the padding tokens since they attend to all of the tokens of a sequence. To get the same result when passing individual sentences of different lengths through the model or when passing a batch with the same sentences and padding applied, we need to tell those attention layers to ignore the padding tokens. This is done by using an attention mask.
+
+## Attention masks
+
+*Attention masks* are tensors with the exact same shape as the input IDs tensor, filled with 0s and 1s: 1s indicate the corresponding tokens should be attended to, and 0s indicate the corresponding tokens should not be attended to (i.e., they should be ignored by the attention layers of the model).
+
+Let's complete the previous example with an attention mask:
+
+{#if fw === 'pt'}
+```py no-format
+batched_ids = [
+    [200, 200, 200],
+    [200, 200, tokenizer.pad_token_id],
+]
+
+attention_mask = [
+    [1, 1, 1],
+    [1, 1, 0],
+]
+
+outputs = model(torch.tensor(batched_ids), attention_mask=torch.tensor(attention_mask))
+print(outputs.logits)
+```
+
+```python out
+tensor([[ 1.5694, -1.3895],
+        [ 0.5803, -0.4125]], grad_fn=<AddmmBackward>)
+```
+{:else}
+```py no-format
+batched_ids = [
+    [200, 200, 200],
+    [200, 200, tokenizer.pad_token_id],
+]
+
+attention_mask = [
+    [1, 1, 1],
+    [1, 1, 0],
+]
+
+outputs = model(tf.constant(batched_ids), attention_mask=tf.constant(attention_mask))
+print(outputs.logits)
+```
+
+```py out
+tf.Tensor(
+[[ 1.5693681  -1.3894582 ]
+ [ 0.5803021  -0.41252586]], shape=(2, 2), dtype=float32)
+```
+{/if}
+
+Now we get the same logits for the second sentence in the batch.
+
+Notice how the last value of the second sequence is a padding ID, which is a 0 value in the attention mask.
+
+<Tip>
+
+✏️ **Try it out!** Apply the tokenization manually on the two sentences used in section 2 ("I've been waiting for a HuggingFace course my whole life." and "I hate this so much!"). Pass them through the model and check that you get the same logits as in section 2. Now batch them together using the padding token, then create the proper attention mask. Check that you obtain the same results when going through the model!
+
+</Tip>
+
+## Longer sequences
+
+With Transformer models, there is a limit to the lengths of the sequences we can pass the models. Most models handle sequences of up to 512 or 1024 tokens, and will crash when asked to process longer sequences. There are two solutions to this problem:
+
+- Use a model with a longer supported sequence length.
+- Truncate your sequences.
+
+Models have different supported sequence lengths, and some specialize in handling very long sequences. [Longformer](https://huggingface.co/transformers/model_doc/longformer.html) is one example, and another is [LED](https://huggingface.co/transformers/model_doc/led.html). If you're working on a task that requires very long sequences, we recommend you take a look at those models.
+
+Otherwise, we recommend you truncate your sequences by specifying the `max_sequence_length` parameter:
+
+```py
+sequence = sequence[:max_sequence_length]
+```
diff --git a/chapters/it/chapter2/6.mdx b/chapters/it/chapter2/6.mdx
new file mode 100644
index 000000000..974123515
--- /dev/null
+++ b/chapters/it/chapter2/6.mdx
@@ -0,0 +1,164 @@
+<FrameworkSwitchCourse {fw} />
+
+# Putting it all together
+
+{#if fw === 'pt'}
+
+<DocNotebookDropdown
+  classNames="absolute z-10 right-0 top-0"
+  options={[
+    {label: "Google Colab", value: "https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter2/section6_pt.ipynb"},
+    {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter2/section6_pt.ipynb"},
+]} />
+
+{:else}
+
+<DocNotebookDropdown
+  classNames="absolute z-10 right-0 top-0"
+  options={[
+    {label: "Google Colab", value: "https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter2/section6_tf.ipynb"},
+    {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter2/section6_tf.ipynb"},
+]} />
+
+{/if}
+
+In the last few sections, we've been trying our best to do most of the work by hand. We've explored how tokenizers work and looked at tokenization, conversion to input IDs, padding, truncation, and attention masks.
+
+However, as we saw in section 2, the 🤗 Transformers API can handle all of this for us with a high-level function that we'll dive into here. When you call your `tokenizer` directly on the sentence, you get back inputs that are ready to pass through your model:
+
+```py
+from transformers import AutoTokenizer
+
+checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+
+sequence = "I've been waiting for a HuggingFace course my whole life."
+
+model_inputs = tokenizer(sequence)
+```
+
+Here, the `model_inputs` variable contains everything that's necessary for a model to operate well. For DistilBERT, that includes the input IDs as well as the attention mask. Other models that accept additional inputs will also have those output by the `tokenizer` object.
+
+As we'll see in some examples below, this method is very powerful. First, it can tokenize a single sequence:
+
+```py
+sequence = "I've been waiting for a HuggingFace course my whole life."
+
+model_inputs = tokenizer(sequence)
+```
+
+It also handles multiple sequences at a time, with no change in the API:
+
+```py
+sequences = ["I've been waiting for a HuggingFace course my whole life.", "So have I!"]
+
+model_inputs = tokenizer(sequences)
+```
+
+It can pad according to several objectives:
+
+```py
+# Will pad the sequences up to the maximum sequence length
+model_inputs = tokenizer(sequences, padding="longest")
+
+# Will pad the sequences up to the model max length
+# (512 for BERT or DistilBERT)
+model_inputs = tokenizer(sequences, padding="max_length")
+
+# Will pad the sequences up to the specified max length
+model_inputs = tokenizer(sequences, padding="max_length", max_length=8)
+```
+
+It can also truncate sequences:
+
+```py
+sequences = ["I've been waiting for a HuggingFace course my whole life.", "So have I!"]
+
+# Will truncate the sequences that are longer than the model max length
+# (512 for BERT or DistilBERT)
+model_inputs = tokenizer(sequences, truncation=True)
+
+# Will truncate the sequences that are longer than the specified max length
+model_inputs = tokenizer(sequences, max_length=8, truncation=True)
+```
+
+The `tokenizer` object can handle the conversion to specific framework tensors, which can then be directly sent to the model. For example, in the following code sample we are prompting the tokenizer to return tensors from the different frameworks — `"pt"` returns PyTorch tensors, `"tf"` returns TensorFlow tensors, and `"np"` returns NumPy arrays:
+
+```py
+sequences = ["I've been waiting for a HuggingFace course my whole life.", "So have I!"]
+
+# Returns PyTorch tensors
+model_inputs = tokenizer(sequences, padding=True, return_tensors="pt")
+
+# Returns TensorFlow tensors
+model_inputs = tokenizer(sequences, padding=True, return_tensors="tf")
+
+# Returns NumPy arrays
+model_inputs = tokenizer(sequences, padding=True, return_tensors="np")
+```
+
+## Special tokens
+
+If we take a look at the input IDs returned by the tokenizer, we will see they are a tiny bit different from what we had earlier:
+
+```py
+sequence = "I've been waiting for a HuggingFace course my whole life."
+
+model_inputs = tokenizer(sequence)
+print(model_inputs["input_ids"])
+
+tokens = tokenizer.tokenize(sequence)
+ids = tokenizer.convert_tokens_to_ids(tokens)
+print(ids)
+```
+
+```python out
+[101, 1045, 1005, 2310, 2042, 3403, 2005, 1037, 17662, 12172, 2607, 2026, 2878, 2166, 1012, 102]
+[1045, 1005, 2310, 2042, 3403, 2005, 1037, 17662, 12172, 2607, 2026, 2878, 2166, 1012]
+```
+
+One token ID was added at the beginning, and one at the end. Let's decode the two sequences of IDs above to see what this is about:
+
+```py
+print(tokenizer.decode(model_inputs["input_ids"]))
+print(tokenizer.decode(ids))
+```
+
+```python out
+"[CLS] i've been waiting for a huggingface course my whole life. [SEP]"
+"i've been waiting for a huggingface course my whole life."
+```
+
+The tokenizer added the special word `[CLS]` at the beginning and the special word `[SEP]` at the end. This is because the model was pretrained with those, so to get the same results for inference we need to add them as well. Note that some models don't add special words, or add different ones; models may also add these special words only at the beginning, or only at the end. In any case, the tokenizer knows which ones are expected and will deal with this for you.
+
+## Wrapping up: From tokenizer to model
+
+Now that we've seen all the individual steps the `tokenizer` object uses when applied on texts, let's see one final time how it can handle multiple sequences (padding!), very long sequences (truncation!), and multiple types of tensors with its main API:
+
+{#if fw === 'pt'}
+```py
+import torch
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+
+checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
+sequences = ["I've been waiting for a HuggingFace course my whole life.", "So have I!"]
+
+tokens = tokenizer(sequences, padding=True, truncation=True, return_tensors="pt")
+output = model(**tokens)
+```
+{:else}
+```py
+import tensorflow as tf
+from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
+
+checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint)
+sequences = ["I've been waiting for a HuggingFace course my whole life.", "So have I!"]
+
+tokens = tokenizer(sequences, padding=True, truncation=True, return_tensors="tf")
+output = model(**tokens)
+```
+{/if}
diff --git a/chapters/it/chapter2/7.mdx b/chapters/it/chapter2/7.mdx
new file mode 100644
index 000000000..122728d08
--- /dev/null
+++ b/chapters/it/chapter2/7.mdx
@@ -0,0 +1,13 @@
+# Basic usage completed!
+
+Great job following the course up to here! To recap, in this chapter you:
+
+- Learned the basic building blocks of a Transformer model.
+- Learned what makes up a tokenization pipeline.
+- Saw how to use a Transformer model in practice.
+- Learned how to leverage a tokenizer to convert text to tensors that are understandable by the model.
+- Set up a tokenizer and a model together to get from text to predictions.
+- Learned the limitations of input IDs, and learned about attention masks.
+- Played around with versatile and configurable tokenizer methods.
+
+From now on, you should be able to freely navigate the 🤗 Transformers docs: the vocabulary will sound familiar, and you've already seen the methods that you'll use the majority of the time.
diff --git a/chapters/it/chapter2/8.mdx b/chapters/it/chapter2/8.mdx
new file mode 100644
index 000000000..43f0a8c9c
--- /dev/null
+++ b/chapters/it/chapter2/8.mdx
@@ -0,0 +1,305 @@
+<FrameworkSwitchCourse {fw} />
+
+<!-- DISABLE-FRONTMATTER-SECTIONS -->
+
+# End-of-chapter quiz
+
+### 1. What is the order of the language modeling pipeline?
+
+<Question
+	choices={[
+		{
+			text: "First, the model, which handles text and returns raw predictions. The tokenizer then makes sense of these predictions and converts them back to text when needed.",
+			explain: "The model cannot understand text! The tokenizer must first tokenize the text and convert it to IDs so that it is understandable by the model."
+		},
+		{
+			text: "First, the tokenizer, which handles text and returns IDs. The model handles these IDs and outputs a prediction, which can be some text.",
+			explain: "The model's prediction cannot be text straight away. The tokenizer has to be used in order to convert the prediction back to text!"
+		},
+		{
+			text: "The tokenizer handles text and returns IDs. The model handles these IDs and outputs a prediction. The tokenizer can then be used once again to convert these predictions back to some text.",
+			explain: "Correct! The tokenizer can be used for both tokenizing and de-tokenizing.",
+            correct: true
+		}
+	]}
+/>
+
+### 2. How many dimensions does the tensor output by the base Transformer model have, and what are they?
+
+<Question
+	choices={[
+		{
+			text: "2: The sequence length and the batch size",
+			explain: "False! The tensor output by the model has a third dimension: hidden size."
+		},
+		{
+			text: "2: The sequence length and the hidden size",
+			explain: "False! All Transformer models handle batches, even with a single sequence; that would be a batch size of 1!"
+		},
+		{
+			text: "3: The sequence length, the batch size, and the hidden size",
+			explain: "Correct!",
+            correct: true
+		}
+	]}
+/>
+
+### 3. Which of the following is an example of subword tokenization?
+
+<Question
+	choices={[
+		{
+			text: "WordPiece",
+			explain: "Yes, that's one example of subword tokenization!",
+            correct: true
+		},
+		{
+			text: "Character-based tokenization",
+			explain: "Character-based tokenization is not a type of subword tokenization."
+		},
+		{
+			text: "Splitting on whitespace and punctuation",
+			explain: "That's a word-based tokenization scheme!"
+		},
+		{
+			text: "BPE",
+			explain: "Yes, that's one example of subword tokenization!",
+            correct: true
+        },
+		{
+			text: "Unigram",
+			explain: "Yes, that's one example of subword tokenization!",
+            correct: true
+        },
+		{
+			text: "None of the above",
+			explain: "Incorrect!"
+        }
+	]}
+/>
+
+### 4. What is a model head?
+
+<Question
+	choices={[
+		{
+			text: "A component of the base Transformer network that redirects tensors to their correct layers",
+			explain: "Incorrect! There's no such component."
+		},
+		{
+			text: "Also known as the self-attention mechanism, it adapts the representation of a token according to the other tokens of the sequence",
+			explain: "Incorrect! The self-attention layer does contain attention \"heads,\" but these are not adaptation heads."
+		},
+		{
+			text: "An additional component, usually made up of one or a few layers, to convert the transformer predictions to a task-specific output",
+			explain: "That's right. Adaptation heads, also known simply as heads, come up in different forms: language modeling heads, question answering heads, sequence classification heads... ",
+			correct: true
+		} 
+	]}
+/>
+
+{#if fw === 'pt'}
+### 5. What is an AutoModel?
+
+<Question
+	choices={[
+		{
+			text: "A model that automatically trains on your data",
+			explain: "Incorrect. Are you mistaking this with our <a href='https://huggingface.co/autonlp'>AutoNLP</a> product?"
+		},
+		{
+			text: "An object that returns the correct architecture based on the checkpoint",
+			explain: "Exactly: the <code>AutoModel</code> only needs to know the checkpoint from which to initialize to return the correct architecture.",
+			correct: true
+		},
+		{
+			text: "A model that automatically detects the language used for its inputs to load the correct weights",
+			explain: "Incorrect; while some checkpoints and models are capable of handling multiple languages, there are no built-in tools for automatic checkpoint selection according to language. You should head over to the <a href='https://huggingface.co/models'>Model Hub</a> to find the best checkpoint for your task!"
+		} 
+	]}
+/>
+
+{:else}
+### 5. What is an TFAutoModel?
+
+<Question
+	choices={[
+		{
+			text: "A model that automatically trains on your data",
+			explain: "Incorrect. Are you mistaking this with our <a href='https://huggingface.co/autonlp'>AutoNLP</a> product?"
+		},
+		{
+			text: "An object that returns the correct architecture based on the checkpoint",
+			explain: "Exactly: the <code>TFAutoModel</code> only needs to know the checkpoint from which to initialize to return the correct architecture.",
+			correct: true
+		},
+		{
+			text: "A model that automatically detects the language used for its inputs to load the correct weights",
+			explain: "Incorrect; while some checkpoints and models are capable of handling multiple languages, there are no built-in tools for automatic checkpoint selection according to language. You should head over to the <a href='https://huggingface.co/models'>Model Hub</a> to find the best checkpoint for your task!"
+		} 
+	]}
+/>
+
+{/if}
+
+### 6. What are the techniques to be aware of when batching sequences of different lengths together?
+
+<Question
+	choices={[
+		{
+			text: "Truncating",
+			explain: "Yes, truncation is a correct way of evening out sequences so that they fit in a rectangular shape. Is it the only one, though?",
+			correct: true
+		},
+		{
+			text: "Returning tensors",
+			explain: "While the other techniques allow you to return rectangular tensors, returning tensors isn't helpful when batching sequences together."
+		},
+		{
+			text: "Padding",
+			explain: "Yes, padding is a correct way of evening out sequences so that they fit in a rectangular shape. Is it the only one, though?",
+			correct: true
+		}, 
+		{
+			text: "Attention masking",
+			explain: "Absolutely! Attention masks are of prime importance when handling sequences of different lengths. That's not the only technique to be aware of, however.",
+			correct: true
+		} 
+	]}
+/>
+
+### 7. What is the point of applying a SoftMax function to the logits output by a sequence classification model?
+
+<Question
+	choices={[
+		{
+			text: "It softens the logits so that they're more reliable.",
+			explain: "No, the SoftMax function does not affect the reliability of results."
+		},
+		{
+			text: "It applies a lower and upper bound so that they're understandable.",
+			explain: "Correct! The resulting values are bound between 0 and 1. That's not the only reason we use a SoftMax function, though.",
+            correct: true
+		},
+		{
+			text: "The total sum of the output is then 1, resulting in a possible probabilistic interpretation.",
+			explain: "Correct! That's not the only reason we use a SoftMax function, though.",
+            correct: true
+		}
+	]}
+/>
+
+### 8. What method is most of the tokenizer API centered around?
+
+<Question
+	choices={[
+		{
+			text: "<code>encode</code>, as it can encode text into IDs and IDs into predictions",
+			explain: "Wrong! While the <code>encode</code> method does exist on tokenizers, it does not exist on models."
+		},
+		{
+			text: "Calling the tokenizer object directly.",
+			explain: "Exactly! The <code>__call__</code> method of the tokenizer is a very powerful method which can handle pretty much anything. It is also the method used to retrieve predictions from a model.",
+			correct: true
+		},
+		{
+			text: "<code>pad</code>",
+			explain: "Wrong! Padding is very useful, but it's just one part of the tokenizer API."
+		},
+		{
+			text: "<code>tokenize</code>",
+			explain: "The <code>tokenize</code> method is arguably one of the most useful methods, but it isn't the core of the tokenizer API."
+		}
+	]}
+/>
+
+### 9. What does the `result` variable contain in this code sample?
+
+```py
+from transformers import AutoTokenizer
+
+tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
+result = tokenizer.tokenize("Hello!")
+```
+
+<Question
+	choices={[
+		{
+			text: "A list of strings, each string being a token",
+			explain: "Absolutely! Convert this to IDs, and send them to a model!",
+            correct: true
+		},
+		{
+			text: "A list of IDs",
+			explain: "Incorrect; that's what the <code>__call__</code> or <code>convert_tokens_to_ids</code> method is for!"
+		},
+		{
+			text: "A string containing all of the tokens",
+			explain: "This would be suboptimal, as the goal is to split the string into multiple tokens."
+		}
+	]}
+/>
+
+{#if fw === 'pt'}
+### 10. Is there something wrong with the following code?
+
+```py
+from transformers import AutoTokenizer, AutoModel
+
+tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
+model = AutoModel.from_pretrained("gpt2")
+
+encoded = tokenizer("Hey!", return_tensors="pt")
+result = model(**encoded)
+```
+
+<Question
+	choices={[
+		{
+			text: "No, it seems correct.",
+			explain: "Unfortunately, coupling a model with a tokenizer that was trained with a different checkpoint is rarely a good idea. The model was not trained to make sense out of this tokenizer's output, so the model output (if it can even run!) will not make any sense."
+		},
+		{
+			text: "The tokenizer and model should always be from the same checkpoint.",
+			explain: "Right!",
+            correct: true
+		},
+		{
+			text: "It's good practice to pad and truncate with the tokenizer as every input is a batch.",
+			explain: "It's true that every model input needs to be a batch. However, truncating or padding this sequence wouldn't necessarily make sense as there is only one of it, and those are techniques to batch together a list of sentences."
+		}
+	]}
+/>
+
+{:else}
+### 10. Is there something wrong with the following code?
+
+```py
+from transformers import AutoTokenizer, TFAutoModel
+
+tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
+model = TFAutoModel.from_pretrained("gpt2")
+
+encoded = tokenizer("Hey!", return_tensors="pt")
+result = model(**encoded)
+```
+
+<Question
+	choices={[
+		{
+			text: "No, it seems correct.",
+			explain: "Unfortunately, coupling a model with a tokenizer that was trained with a different checkpoint is rarely a good idea. The model was not trained to make sense out of this tokenizer's output, so the model output (if it can even run!) will not make any sense."
+		},
+		{
+			text: "The tokenizer and model should always be from the same checkpoint.",
+			explain: "Right!",
+            correct: true
+		},
+		{
+			text: "It's good practice to pad and truncate with the tokenizer as every input is a batch.",
+			explain: "It's true that every model input needs to be a batch. However, truncating or padding this sequence wouldn't necessarily make sense as there is only one of it, and those are techniques to batch together a list of sentences."
+		}
+	]}
+/>
+
+{/if}

From 7c5c4cebf0081858b59220c972df41e39221fd23 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 29 Mar 2022 19:33:57 +0100
Subject: [PATCH 014/127] Add files via upload

---
 1.mdx |  20 ++++
 2.mdx | 353 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3.mdx | 228 +++++++++++++++++++++++++++++++++++++
 4.mdx | 240 +++++++++++++++++++++++++++++++++++++++
 5.mdx | 338 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 6.mdx | 164 +++++++++++++++++++++++++++
 7.mdx |  13 +++
 8.mdx | 305 ++++++++++++++++++++++++++++++++++++++++++++++++++
 8 files changed, 1661 insertions(+)
 create mode 100644 1.mdx
 create mode 100644 2.mdx
 create mode 100644 3.mdx
 create mode 100644 4.mdx
 create mode 100644 5.mdx
 create mode 100644 6.mdx
 create mode 100644 7.mdx
 create mode 100644 8.mdx

diff --git a/1.mdx b/1.mdx
new file mode 100644
index 000000000..9ab184b82
--- /dev/null
+++ b/1.mdx
@@ -0,0 +1,20 @@
+# Introduction
+
+As you saw in [Chapter 1](/course/chapter1), Transformer models are usually very large. With millions to tens of *billions* of parameters, training and deploying these models is a complicated undertaking. Furthermore, with new models being released on a near-daily basis and each having its own implementation, trying them all out is no easy task.
+
+The 🤗 Transformers library was created to solve this problem. Its goal is to provide a single API through which any Transformer model can be loaded, trained, and saved. The library's main features are:
+
+- **Ease of use**: Downloading, loading, and using a state-of-the-art NLP model for inference can be done in just two lines of code.
+- **Flexibility**: At their core, all models are simple PyTorch `nn.Module` or TensorFlow `tf.keras.Model` classes and can be handled like any other models in their respective machine learning (ML) frameworks.
+- **Simplicity**: Hardly any abstractions are made across the library. The "All in one file" is a core concept: a model's forward pass is entirely defined in a single file, so that the code itself is understandable and hackable.
+
+This last feature makes 🤗 Transformers quite different from other ML libraries. The models are not built on modules 
+that are shared across files; instead, each model has its own layers. In addition to making the models more approachable and understandable, this allows you to easily experiment on one model without affecting others.
+
+This chapter will begin with an end-to-end example where we use a model and a tokenizer together to replicate the `pipeline()` function introduced in [Chapter 1](/course/chapter1). Next, we'll discuss the model API: we'll dive into the model and configuration classes, and show you how to load a model and how it processes numerical inputs to output predictions. 
+
+Then we'll look at the tokenizer API, which is the other main component of the `pipeline()` function. Tokenizers take care of the first and last processing steps, handling the conversion from text to numerical inputs for the neural network, and the conversion back to text when it is needed. Finally, we'll show you how to handle sending multiple sentences through a model in a prepared batch, then wrap it all up with a closer look at the high-level `tokenizer()` function.
+
+<Tip>
+⚠️ In order to benefit from all features available with the Model Hub and 🤗 Transformers, we recommend <a href="https://huggingface.co/join">creating an account</a>.
+</Tip>
\ No newline at end of file
diff --git a/2.mdx b/2.mdx
new file mode 100644
index 000000000..a7715efc7
--- /dev/null
+++ b/2.mdx
@@ -0,0 +1,353 @@
+<FrameworkSwitchCourse {fw} />
+
+# Behind the pipeline
+
+{#if fw === 'pt'}
+
+<DocNotebookDropdown
+  classNames="absolute z-10 right-0 top-0"
+  options={[
+    {label: "Google Colab", value: "https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter2/section2_pt.ipynb"},
+    {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter2/section2_pt.ipynb"},
+]} />
+
+{:else}
+
+<DocNotebookDropdown
+  classNames="absolute z-10 right-0 top-0"
+  options={[
+    {label: "Google Colab", value: "https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter2/section2_tf.ipynb"},
+    {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter2/section2_tf.ipynb"},
+]} />
+
+{/if}
+
+<Tip>
+This is the first section where the content is slightly different depending on whether you use PyTorch and TensorFlow. Toogle the switch on top of the title to select the platform you prefer!
+</Tip>
+
+{#if fw === 'pt'}
+<Youtube id="1pedAIvTWXk"/>
+{:else}
+<Youtube id="wVN12smEvqg"/>
+{/if}
+
+Let's start with a complete example, taking a look at what happened behind the scenes when we executed the following code in [Chapter 1](/course/chapter1):
+
+```python
+from transformers import pipeline
+
+classifier = pipeline("sentiment-analysis")
+classifier(
+    [
+        "I've been waiting for a HuggingFace course my whole life.",
+        "I hate this so much!",
+    ]
+)
+```
+
+and obtained:
+
+```python out
+[{'label': 'POSITIVE', 'score': 0.9598047137260437},
+ {'label': 'NEGATIVE', 'score': 0.9994558095932007}]
+```
+
+As we saw in [Chapter 1](/course/chapter1), this pipeline groups together three steps: preprocessing, passing the inputs through the model, and postprocessing:
+
+<div class="flex justify-center">
+<img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter2/full_nlp_pipeline.svg" alt="The full NLP pipeline: tokenization of text, conversion to IDs, and inference through the Transformer model and the model head."/>
+<img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter2/full_nlp_pipeline-dark.svg" alt="The full NLP pipeline: tokenization of text, conversion to IDs, and inference through the Transformer model and the model head."/>
+</div>
+
+Let's quickly go over each of these.
+
+## Preprocessing with a tokenizer
+
+Like other neural networks, Transformer models can't process raw text directly, so the first step of our pipeline is to convert the text inputs into numbers that the model can make sense of. To do this we use a *tokenizer*, which will be responsible for:
+
+- Splitting the input into words, subwords, or symbols (like punctuation) that are called *tokens*
+- Mapping each token to an integer
+- Adding additional inputs that may be useful to the model
+
+All this preprocessing needs to be done in exactly the same way as when the model was pretrained, so we first need to download that information from the [Model Hub](https://huggingface.co/models). To do this, we use the `AutoTokenizer` class and its `from_pretrained()` method. Using the checkpoint name of our model, it will automatically fetch the data associated with the model's tokenizer and cache it (so it's only downloaded the first time you run the code below).
+
+Since the default checkpoint of the `sentiment-analysis` pipeline is `distilbert-base-uncased-finetuned-sst-2-english` (you can see its model card [here](https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english)), we run the following:
+
+```python
+from transformers import AutoTokenizer
+
+checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+```
+
+Once we have the tokenizer, we can directly pass our sentences to it and we'll get back a dictionary that's ready to feed to our model! The only thing left to do is to convert the list of input IDs to tensors.
+
+You can use 🤗 Transformers without having to worry about which ML framework is used as a backend; it might be PyTorch or TensorFlow, or Flax for some models. However, Transformer models only accept *tensors* as input. If this is your first time hearing about tensors, you can think of them as NumPy arrays instead. A NumPy array can be a scalar (0D), a vector (1D), a matrix (2D), or have more dimensions. It's effectively a tensor; other ML frameworks' tensors behave similarly, and are usually as simple to instantiate as NumPy arrays.
+
+To specify the type of tensors we want to get back (PyTorch, TensorFlow, or plain NumPy), we use the `return_tensors` argument:
+
+{#if fw === 'pt'}
+```python
+raw_inputs = [
+    "I've been waiting for a HuggingFace course my whole life.",
+    "I hate this so much!",
+]
+inputs = tokenizer(raw_inputs, padding=True, truncation=True, return_tensors="pt")
+print(inputs)
+```
+{:else}
+```python
+raw_inputs = [
+    "I've been waiting for a HuggingFace course my whole life.",
+    "I hate this so much!",
+]
+inputs = tokenizer(raw_inputs, padding=True, truncation=True, return_tensors="tf")
+print(inputs)
+```
+{/if}
+
+Don't worry about padding and truncation just yet; we'll explain those later. The main things to remember here are that you can pass one sentence or a list of sentences, as well as specifying the type of tensors you want to get back (if no type is passed, you will get a list of lists as a result).
+
+{#if fw === 'pt'}
+
+Here's what the results look like as PyTorch tensors:
+
+```python out
+{
+    'input_ids': tensor([
+        [  101,  1045,  1005,  2310,  2042,  3403,  2005,  1037, 17662, 12172, 2607,  2026,  2878,  2166,  1012,   102],
+        [  101,  1045,  5223,  2023,  2061,  2172,   999,   102,     0,     0,     0,     0,     0,     0,     0,     0]
+    ]), 
+    'attention_mask': tensor([
+        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+        [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+    ])
+}
+```
+{:else}
+
+Here's what the results look like as TensorFlow tensors:
+
+```python out
+{
+    'input_ids': <tf.Tensor: shape=(2, 16), dtype=int32, numpy=
+        array([
+            [  101,  1045,  1005,  2310,  2042,  3403,  2005,  1037, 17662, 12172,  2607,  2026,  2878,  2166,  1012,   102],
+            [  101,  1045,  5223,  2023,  2061,  2172,   999,   102,     0,     0,     0,     0,     0,     0,     0,     0]
+        ], dtype=int32)>, 
+    'attention_mask': <tf.Tensor: shape=(2, 16), dtype=int32, numpy=
+        array([
+            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+            [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+        ], dtype=int32)>
+}
+```
+{/if}
+
+The output itself is a dictionary containing two keys, `input_ids` and `attention_mask`. `input_ids` contains two rows of integers (one for each sentence) that are the unique identifiers of the tokens in each sentence. We'll explain what the `attention_mask` is later in this chapter. 
+
+## Going through the model
+
+{#if fw === 'pt'}
+We can download our pretrained model the same way we did with our tokenizer. 🤗 Transformers provides an `AutoModel` class which also has a `from_pretrained()` method:
+
+```python
+from transformers import AutoModel
+
+checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
+model = AutoModel.from_pretrained(checkpoint)
+```
+{:else}
+We can download our pretrained model the same way we did with our tokenizer. 🤗 Transformers provides an `TFAutoModel` class which also has a `from_pretrained` method:
+
+```python
+from transformers import TFAutoModel
+
+checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
+model = TFAutoModel.from_pretrained(checkpoint)
+```
+{/if}
+
+In this code snippet, we have downloaded the same checkpoint we used in our pipeline before (it should actually have been cached already) and instantiated a model with it.
+
+This architecture contains only the base Transformer module: given some inputs, it outputs what we'll call *hidden states*, also known as *features*. For each model input, we'll retrieve a high-dimensional vector representing the **contextual understanding of that input by the Transformer model**.
+
+If this doesn't make sense, don't worry about it. We'll explain it all later.
+
+While these hidden states can be useful on their own, they're usually inputs to another part of the model, known as the *head*. In [Chapter 1](/course/chapter1), the different tasks could have been performed with the same architecture, but each of these tasks will have a different head associated with it.
+
+### A high-dimensional vector?
+
+The vector output by the Transformer module is usually large. It generally has three dimensions:
+
+- **Batch size**: The number of sequences processed at a time (2 in our example).
+- **Sequence length**: The length of the numerical representation of the sequence (16 in our example).
+- **Hidden size**: The vector dimension of each model input.
+
+It is said to be "high dimensional" because of the last value. The hidden size can be very large (768 is common for smaller models, and in larger models this can reach 3072 or more).
+
+We can see this if we feed the inputs we preprocessed to our model:
+
+{#if fw === 'pt'}
+```python
+outputs = model(**inputs)
+print(outputs.last_hidden_state.shape)
+```
+
+```python out
+torch.Size([2, 16, 768])
+```
+{:else}
+```py
+outputs = model(inputs)
+print(outputs.last_hidden_state.shape)
+```
+
+```python out
+(2, 16, 768)
+```
+{/if}
+
+Note that the outputs of 🤗 Transformers models behave like `namedtuple`s or dictionaries. You can access the elements by attributes (like we did) or by key (`outputs["last_hidden_state"]`), or even by index if you know exactly where the thing you are looking for is (`outputs[0]`).
+
+### Model heads: Making sense out of numbers
+
+The model heads take the high-dimensional vector of hidden states as input and project them onto a different dimension. They are usually composed of one or a few linear layers:
+
+<div class="flex justify-center">
+<img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter2/transformer_and_head.svg" alt="A Transformer network alongside its head."/>
+<img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter2/transformer_and_head-dark.svg" alt="A Transformer network alongside its head."/>
+</div>
+
+The output of the Transformer model is sent directly to the model head to be processed.
+
+In this diagram, the model is represented by its embeddings layer and the subsequent layers. The embeddings layer converts each input ID in the tokenized input into a vector that represents the associated token. The subsequent layers manipulate those vectors using the attention mechanism to produce the final representation of the sentences.
+
+There are many different architectures available in 🤗 Transformers, with each one designed around tackling a specific task. Here is a non-exhaustive list:
+
+- `*Model` (retrieve the hidden states)
+- `*ForCausalLM`
+- `*ForMaskedLM`
+- `*ForMultipleChoice`
+- `*ForQuestionAnswering`
+- `*ForSequenceClassification`
+- `*ForTokenClassification`
+- and others 🤗
+
+{#if fw === 'pt'}
+For our example, we will need a model with a sequence classification head (to be able to classify the sentences as positive or negative). So, we won't actually use the `AutoModel` class, but `AutoModelForSequenceClassification`:
+
+```python
+from transformers import AutoModelForSequenceClassification
+
+checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
+model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
+outputs = model(**inputs)
+```
+{:else}
+For our example, we will need a model with a sequence classification head (to be able to classify the sentences as positive or negative). So, we won't actually use the `TFAutoModel` class, but `TFAutoModelForSequenceClassification`:
+
+```python
+from transformers import TFAutoModelForSequenceClassification
+
+checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
+model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint)
+outputs = model(inputs)
+```
+{/if}
+
+Now if we look at the shape of our inputs, the dimensionality will be much lower: the model head takes as input the high-dimensional vectors we saw before, and outputs vectors containing two values (one per label):
+
+```python
+print(outputs.logits.shape)
+```
+
+{#if fw === 'pt'}
+```python out
+torch.Size([2, 2])
+```
+{:else}
+```python out
+(2, 2)
+```
+{/if}
+
+Since we have just two sentences and two labels, the result we get from our model is of shape 2 x 2.
+
+## Postprocessing the output
+
+The values we get as output from our model don't necessarily make sense by themselves. Let's take a look:
+
+```python
+print(outputs.logits)
+```
+
+{#if fw === 'pt'}
+```python out
+tensor([[-1.5607,  1.6123],
+        [ 4.1692, -3.3464]], grad_fn=<AddmmBackward>)
+```
+{:else}
+```python out
+<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
+    array([[-1.5606991,  1.6122842],
+           [ 4.169231 , -3.3464472]], dtype=float32)>
+```
+{/if}
+
+Our model predicted `[-1.5607, 1.6123]` for the first sentence and `[ 4.1692, -3.3464]` for the second one. Those are not probabilities but *logits*, the raw, unnormalized scores outputted by the last layer of the model. To be converted to probabilities, they need to go through a [SoftMax](https://en.wikipedia.org/wiki/Softmax_function) layer (all 🤗 Transformers models output the logits, as the loss function for training will generally fuse the last activation function, such as SoftMax, with the actual loss function, such as cross entropy):
+
+{#if fw === 'pt'}
+```py
+import torch
+
+predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
+print(predictions)
+```
+{:else}
+```py
+import tensorflow as tf
+
+predictions = tf.math.softmax(outputs.logits, axis=-1)
+print(predictions)
+```
+{/if}
+
+{#if fw === 'pt'}
+```python out
+tensor([[4.0195e-02, 9.5980e-01],
+        [9.9946e-01, 5.4418e-04]], grad_fn=<SoftmaxBackward>)
+```
+{:else}
+```python out
+tf.Tensor(
+[[4.01951671e-02 9.59804833e-01]
+ [9.9945587e-01 5.4418424e-04]], shape=(2, 2), dtype=float32)
+```
+{/if}
+
+Now we can see that the model predicted `[0.0402, 0.9598]` for the first sentence and `[0.9995,  0.0005]` for the second one. These are recognizable probability scores.
+
+To get the labels corresponding to each position, we can inspect the `id2label` attribute of the model config (more on this in the next section):
+
+```python
+model.config.id2label
+```
+
+```python out
+{0: 'NEGATIVE', 1: 'POSITIVE'}
+```
+
+Now we can conclude that the model predicted the following:
+ 
+- First sentence: NEGATIVE: 0.0402, POSITIVE: 0.9598
+- Second sentence: NEGATIVE: 0.9995, POSITIVE: 0.0005
+
+We have successfully reproduced the three steps of the pipeline: preprocessing with tokenizers, passing the inputs through the model, and postprocessing! Now let's take some time to dive deeper into each of those steps.
+
+<Tip>
+
+✏️ **Try it out!** Choose two (or more) texts of your own and run them through the `sentiment-analysis` pipeline. Then replicate the steps you saw here yourself and check that you obtain the same results!
+
+</Tip>
diff --git a/3.mdx b/3.mdx
new file mode 100644
index 000000000..c9100c42c
--- /dev/null
+++ b/3.mdx
@@ -0,0 +1,228 @@
+<FrameworkSwitchCourse {fw} />
+
+# Models
+
+{#if fw === 'pt'}
+
+<DocNotebookDropdown
+  classNames="absolute z-10 right-0 top-0"
+  options={[
+    {label: "Google Colab", value: "https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter2/section3_pt.ipynb"},
+    {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter2/section3_pt.ipynb"},
+]} />
+
+{:else}
+
+<DocNotebookDropdown
+  classNames="absolute z-10 right-0 top-0"
+  options={[
+    {label: "Google Colab", value: "https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter2/section3_tf.ipynb"},
+    {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter2/section3_tf.ipynb"},
+]} />
+
+{/if}
+
+{#if fw === 'pt'}
+<Youtube id="AhChOFRegn4"/>
+{:else}
+<Youtube id="d3JVgghSOew"/>
+{/if}
+
+{#if fw === 'pt'}
+In this section we'll take a closer look at creating and using a model. We'll use the `AutoModel` class, which is handy when you want to instantiate any model from a checkpoint.
+
+The `AutoModel` class and all of its relatives are actually simple wrappers over the wide variety of models available in the library. It's a clever wrapper as it can automatically guess the appropriate model architecture for your checkpoint, and then instantiates a model with this architecture.
+
+{:else}
+In this section we'll take a closer look at creating and using a model. We'll use the `TFAutoModel` class, which is handy when you want to instantiate any model from a checkpoint.
+
+The `TFAutoModel` class and all of its relatives are actually simple wrappers over the wide variety of models available in the library. It's a clever wrapper as it can automatically guess the appropriate model architecture for your checkpoint, and then instantiates a model with this architecture.
+
+{/if}
+
+However, if you know the type of model you want to use, you can use the class that defines its architecture directly. Let's take a look at how this works with a BERT model.
+
+## Creating a Transformer
+
+The first thing we'll need to do to initialize a BERT model is load a configuration object:
+
+{#if fw === 'pt'}
+```py
+from transformers import BertConfig, BertModel
+
+# Building the config
+config = BertConfig()
+
+# Building the model from the config
+model = BertModel(config)
+```
+{:else}
+```py
+from transformers import BertConfig, TFBertModel
+
+# Building the config
+config = BertConfig()
+
+# Building the model from the config
+model = TFBertModel(config)
+```
+{/if}
+
+The configuration contains many attributes that are used to build the model:
+
+```py
+print(config)
+```
+
+```python out
+BertConfig {
+  [...]
+  "hidden_size": 768,
+  "intermediate_size": 3072,
+  "max_position_embeddings": 512,
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  [...]
+}
+```
+
+While you haven't seen what all of these attributes do yet, you should recognize some of them: the `hidden_size` attribute defines the size of the `hidden_states` vector, and `num_hidden_layers` defines the number of layers the Transformer model has.
+
+### Different loading methods
+
+Creating a model from the default configuration initializes it with random values:
+
+{#if fw === 'pt'}
+```py
+from transformers import BertConfig, BertModel
+
+config = BertConfig()
+model = BertModel(config)
+
+# Model is randomly initialized!
+```
+{:else}
+```py
+from transformers import BertConfig, TFBertModel
+
+config = BertConfig()
+model = TFBertModel(config)
+
+# Model is randomly initialized!
+```
+{/if}
+
+The model can be used in this state, but it will output gibberish; it needs to be trained first. We could train the model from scratch on the task at hand, but as you saw in [Chapter 1](/course/chapter1), this would require a long time and a lot of data, and it would have a non-negligible environmental impact. To avoid unnecessary and duplicated effort, it's imperative to be able to share and reuse models that have already been trained.
+
+Loading a Transformer model that is already trained is simple — we can do this using the `from_pretrained()` method:
+
+{#if fw === 'pt'}
+```py
+from transformers import BertModel
+
+model = BertModel.from_pretrained("bert-base-cased")
+```
+
+As you saw earlier, we could replace `BertModel` with the equivalent `AutoModel` class. We'll do this from now on as this produces checkpoint-agnostic code; if your code works for one checkpoint, it should work seamlessly with another. This applies even if the architecture is different, as long as the checkpoint was trained for a similar task (for example, a sentiment analysis task).
+
+{:else}
+```py
+from transformers import TFBertModel
+
+model = TFBertModel.from_pretrained("bert-base-cased")
+```
+
+As you saw earlier, we could replace `TFBertModel` with the equivalent `TFAutoModel` class. We'll do this from now on as this produces checkpoint-agnostic code; if your code works for one checkpoint, it should work seamlessly with another. This applies even if the architecture is different, as long as the checkpoint was trained for a similar task (for example, a sentiment analysis task).
+
+{/if}
+
+In the code sample above we didn't use `BertConfig`, and instead loaded a pretrained model via the `bert-base-cased` identifier. This is a model checkpoint that was trained by the authors of BERT themselves; you can find more details about it in its [model card](https://huggingface.co/bert-base-cased).
+
+This model is now initialized with all the weights of the checkpoint. It can be used directly for inference on the tasks it was trained on, and it can also be fine-tuned on a new task. By training with pretrained weights rather than from scratch, we can quickly achieve good results.
+
+The weights have been downloaded and cached (so future calls to the `from_pretrained()` method won't re-download them) in the cache folder, which defaults to *~/.cache/huggingface/transformers*. You can customize your cache folder by setting the `HF_HOME` environment variable.
+
+The identifier used to load the model can be the identifier of any model on the Model Hub, as long as it is compatible with the BERT architecture. The entire list of available BERT checkpoints can be found [here](https://huggingface.co/models?filter=bert).
+
+### Saving methods
+
+Saving a model is as easy as loading one — we use the `save_pretrained()` method, which is analogous to the `from_pretrained()` method:
+
+```py
+model.save_pretrained("directory_on_my_computer")
+```
+
+This saves two files to your disk:
+
+{#if fw === 'pt'}
+```
+ls directory_on_my_computer
+
+config.json pytorch_model.bin
+```
+{:else}
+```
+ls directory_on_my_computer
+
+config.json tf_model.h5
+```
+{/if}
+
+If you take a look at the *config.json* file, you'll recognize the attributes necessary to build the model architecture. This file also contains some metadata, such as where the checkpoint originated and what 🤗 Transformers version you were using when you last saved the checkpoint.
+
+{#if fw === 'pt'}
+The *pytorch_model.bin* file is known as the *state dictionary*; it contains all your model's weights. The two files go hand in hand; the configuration is necessary to know your model's architecture, while the model weights are your model's parameters.
+
+{:else}
+The *tf_model.h5* file is known as the *state dictionary*; it contains all your model's weights. The two files go hand in hand; the configuration is necessary to know your model's architecture, while the model weights are your model's parameters.
+
+{/if}
+
+## Using a Transformer model for inference
+
+Now that you know how to load and save a model, let's try using it to make some predictions. Transformer models can only process numbers — numbers that the tokenizer generates. But before we discuss tokenizers, let's explore what inputs the model accepts.
+
+Tokenizers can take care of casting the inputs to the appropriate framework's tensors, but to help you understand what's going on, we'll take a quick look at what must be done before sending the inputs to the model.
+
+Let's say we have a couple of sequences:
+
+```py
+sequences = ["Hello!", "Cool.", "Nice!"]
+```
+
+The tokenizer converts these to vocabulary indices which are typically called *input IDs*. Each sequence is now a list of numbers! The resulting output is:
+
+```py no-format
+encoded_sequences = [
+    [101, 7592, 999, 102],
+    [101, 4658, 1012, 102],
+    [101, 3835, 999, 102],
+]
+```
+
+This is a list of encoded sequences: a list of lists. Tensors only accept rectangular shapes (think matrices). This "array" is already of rectangular shape, so converting it to a tensor is easy:
+
+{#if fw === 'pt'}
+```py
+import torch
+
+model_inputs = torch.tensor(encoded_sequences)
+```
+{:else}
+```py
+import tensorflow as tf
+
+model_inputs = tf.constant(encoded_sequences)
+```
+{/if}
+
+### Using the tensors as inputs to the model
+
+Making use of the tensors with the model is extremely simple — we just call the model with the inputs:
+
+```py
+output = model(model_inputs)
+```
+
+While the model accepts a lot of different arguments, only the input IDs are necessary. We'll explain what the other arguments do and when they are required later, 
+but first we need to take a closer look at the tokenizers that build the inputs that a Transformer model can understand.
diff --git a/4.mdx b/4.mdx
new file mode 100644
index 000000000..ccebe04ec
--- /dev/null
+++ b/4.mdx
@@ -0,0 +1,240 @@
+<FrameworkSwitchCourse {fw} />
+
+# Tokenizers
+
+{#if fw === 'pt'}
+
+<DocNotebookDropdown
+  classNames="absolute z-10 right-0 top-0"
+  options={[
+    {label: "Google Colab", value: "https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter2/section4_pt.ipynb"},
+    {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter2/section4_pt.ipynb"},
+]} />
+
+{:else}
+
+<DocNotebookDropdown
+  classNames="absolute z-10 right-0 top-0"
+  options={[
+    {label: "Google Colab", value: "https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter2/section4_tf.ipynb"},
+    {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter2/section4_tf.ipynb"},
+]} />
+
+{/if}
+
+<Youtube id="VFp38yj8h3A"/>
+
+Tokenizers are one of the core components of the NLP pipeline. They serve one purpose: to translate text into data that can be processed by the model. Models can only process numbers, so tokenizers need to convert our text inputs to numerical data. In this section, we'll explore exactly what happens in the tokenization pipeline. 
+
+In NLP tasks, the data that is generally processed is raw text. Here's an example of such text:
+
+```
+Jim Henson was a puppeteer
+```
+
+However, models can only process numbers, so we need to find a way to convert the raw text to numbers. That's what the tokenizers do, and there are a lot of ways to go about this. The goal is to find the most meaningful representation — that is, the one that makes the most sense to the model — and, if possible, the smallest representation.
+
+Let's take a look at some examples of tokenization algorithms, and try to answer some of the questions you may have about tokenization.
+
+## Word-based
+
+<Youtube id="nhJxYji1aho"/>
+
+The first type of tokenizer that comes to mind is _word-based_. It's generally very easy to set up and use with only a few rules, and it often yields decent results. For example, in the image below, the goal is to split the raw text into words and find a numerical representation for each of them:
+
+<div class="flex justify-center">
+  <img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter2/word_based_tokenization.svg" alt="An example of word-based tokenization."/>
+  <img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter2/word_based_tokenization-dark.svg" alt="An example of word-based tokenization."/>
+</div>
+
+There are different ways to split the text. For example, we could could use whitespace to tokenize the text into words by applying Python's `split()` function:
+
+```py
+tokenized_text = "Jim Henson was a puppeteer".split()
+print(tokenized_text)
+```
+
+```python out
+['Jim', 'Henson', 'was', 'a', 'puppeteer']
+```
+
+There are also variations of word tokenizers that have extra rules for punctuation. With this kind of tokenizer, we can end up with some pretty large "vocabularies," where a vocabulary is defined by the total number of independent tokens that we have in our corpus.
+
+Each word gets assigned an ID, starting from 0 and going up to the size of the vocabulary. The model uses these IDs to identify each word.
+
+If we want to completely cover a language with a word-based tokenizer, we'll need to have an identifier for each word in the language, which will generate a huge amount of tokens. For example, there are over 500,000 words in the English language, so to build a map from each word to an input ID we'd need to keep track of that many IDs. Furthermore, words like "dog" are represented differently from words like "dogs", and the model will initially have no way of knowing that "dog" and "dogs" are similar: it will identify the two words as unrelated. The same applies to other similar words, like "run" and "running", which the model will not see as being similar initially.
+
+Finally, we need a custom token to represent words that are not in our vocabulary. This is known as the "unknown" token, often represented as "[UNK]" or "&lt;unk&gt;". It's generally a bad sign if you see that the tokenizer is producing a lot of these tokens, as it wasn't able to retrieve a sensible representation of a word and you're losing information along the way. The goal when crafting the vocabulary is to do it in such a way that the tokenizer tokenizes as few words as possible into the unknown token.
+
+One way to reduce the amount of unknown tokens is to go one level deeper, using a _character-based_ tokenizer.
+
+## Character-based
+
+<Youtube id="ssLq_EK2jLE"/>
+
+Character-based tokenizers split the text into characters, rather than words. This has two primary benefits:
+
+- The vocabulary is much smaller.
+- There are much fewer out-of-vocabulary (unknown) tokens, since every word can be built from characters.
+
+But here too some questions arise concerning spaces and punctuation:
+
+<div class="flex justify-center">
+  <img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter2/character_based_tokenization.svg" alt="An example of character-based tokenization."/>
+  <img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter2/character_based_tokenization-dark.svg" alt="An example of character-based tokenization."/>
+</div>
+
+This approach isn't perfect either. Since the representation is now based on characters rather than words, one could argue that, intuitively, it's less meaningful: each character doesn't mean a lot on its own, whereas that is the case with words. However, this again differs according to the language; in Chinese, for example, each character carries more information than a character in a Latin language.
+
+Another thing to consider is that we'll end up with a very large amount of tokens to be processed by our model: whereas a word would only be a single token with a word-based tokenizer, it can easily turn into 10 or more tokens when converted into characters.
+
+To get the best of both worlds, we can use a third technique that combines the two approaches: *subword tokenization*.
+
+## Subword tokenization
+
+<Youtube id="zHvTiHr506c"/>
+
+Subword tokenization algorithms rely on the principle that frequently used words should not be split into smaller subwords, but rare words should be decomposed into meaningful subwords.
+
+For instance, "annoyingly" might be considered a rare word and could be decomposed into "annoying" and "ly". These are both likely to appear more frequently as standalone subwords, while at the same time the meaning of "annoyingly" is kept by the composite meaning of "annoying" and "ly".
+
+Here is an example showing how a subword tokenization algorithm would tokenize the sequence "Let's do tokenization!":
+
+<div class="flex justify-center">
+  <img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter2/bpe_subword.svg" alt="A subword tokenization algorithm."/>
+  <img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter2/bpe_subword-dark.svg" alt="A subword tokenization algorithm."/>
+</div>
+
+These subwords end up providing a lot of semantic meaning: for instance, in the example above "tokenization" was split into "token" and "ization", two tokens that have a semantic meaning while being space-efficient (only two tokens are needed to represent a long word). This allows us to have relatively good coverage with small vocabularies, and close to no unknown tokens.
+
+This approach is especially useful in agglutinative languages such as Turkish, where you can form (almost) arbitrarily long complex words by stringing together subwords.
+
+### And more!
+
+Unsurprisingly, there are many more techniques out there. To name a few:
+
+- Byte-level BPE, as used in GPT-2
+- WordPiece, as used in BERT
+- SentencePiece or Unigram, as used in several multilingual models
+
+You should now have sufficient knowledge of how tokenizers work to get started with the API.
+
+## Loading and saving
+
+Loading and saving tokenizers is as simple as it is with models. Actually, it's based on the same two methods: `from_pretrained()` and `save_pretrained()`. These methods will load or save the algorithm used by the tokenizer (a bit like the *architecture* of the model) as well as its vocabulary (a bit like the *weights* of the model).
+
+Loading the BERT tokenizer trained with the same checkpoint as BERT is done the same way as loading the model, except we use the `BertTokenizer` class:
+
+```py
+from transformers import BertTokenizer
+
+tokenizer = BertTokenizer.from_pretrained("bert-base-cased")
+```
+
+{#if fw === 'pt'}
+Similar to `AutoModel`, the `AutoTokenizer` class will grab the proper tokenizer class in the library based on the checkpoint name, and can be used directly with any checkpoint:
+
+{:else}
+Similar to `TFAutoModel`, the `AutoTokenizer` class will grab the proper tokenizer class in the library based on the checkpoint name, and can be used directly with any checkpoint:
+
+{/if}
+
+```py
+from transformers import AutoTokenizer
+
+tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
+```
+
+We can now use the tokenizer as shown in the previous section:
+
+```python
+tokenizer("Using a Transformer network is simple")
+```
+
+```python out
+{'input_ids': [101, 7993, 170, 11303, 1200, 2443, 1110, 3014, 102],
+ 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0],
+ 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1]}
+```
+
+Saving a tokenizer is identical to saving a model:
+
+```py
+tokenizer.save_pretrained("directory_on_my_computer")
+```
+
+We'll talk more about `token_type_ids` in [Chapter 3](/course/chapter3), and we'll explain the `attention_mask` key a little later. First, let's see how the `input_ids` are generated. To do this, we'll need to look at the intermediate methods of the tokenizer.
+
+## Encoding
+
+<Youtube id="Yffk5aydLzg"/>
+
+Translating text to numbers is known as _encoding_. Encoding is done in a two-step process: the tokenization, followed by the conversion to input IDs.
+
+As we've seen, the first step is to split the text into words (or parts of words, punctuation symbols, etc.), usually called *tokens*. There are multiple rules that can govern that process, which is why we need to instantiate the tokenizer using the name of the model, to make sure we use the same rules that were used when the model was pretrained.
+
+The second step is to convert those tokens into numbers, so we can build a tensor out of them and feed them to the model. To do this, the tokenizer has a *vocabulary*, which is the part we download when we instantiate it with the `from_pretrained()` method. Again, we need to use the same vocabulary used when the model was pretrained.
+
+To get a better understanding of the two steps, we'll explore them separately. Note that we will use some methods that perform parts of the tokenization pipeline separately to show you the intermediate results of those steps, but in practice, you should call the tokenizer directly on your inputs (as shown in the section 2).
+
+### Tokenization
+
+The tokenization process is done by the `tokenize()` method of the tokenizer:
+
+```py
+from transformers import AutoTokenizer
+
+tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
+
+sequence = "Using a Transformer network is simple"
+tokens = tokenizer.tokenize(sequence)
+
+print(tokens)
+```
+
+The output of this method is a list of strings, or tokens:
+
+```python out
+['Using', 'a', 'transform', '##er', 'network', 'is', 'simple']
+```
+
+This tokenizer is a subword tokenizer: it splits the words until it obtains tokens that can be represented by its vocabulary. That's the case here with `transformer`, which is split into two tokens: `transform` and `##er`.
+
+### From tokens to input IDs
+
+The conversion to input IDs is handled by the `convert_tokens_to_ids()` tokenizer method:
+
+```py
+ids = tokenizer.convert_tokens_to_ids(tokens)
+
+print(ids)
+```
+
+```python out
+[7993, 170, 11303, 1200, 2443, 1110, 3014]
+```
+
+These outputs, once converted to the appropriate framework tensor, can then be used as inputs to a model as seen earlier in this chapter.
+
+<Tip>
+
+✏️ **Try it out!** Replicate the two last steps (tokenization and conversion to input IDs) on the input sentences we used in section 2 ("I've been waiting for a HuggingFace course my whole life." and "I hate this so much!"). Check that you get the same input IDs we got earlier!
+
+</Tip>
+
+## Decoding
+
+*Decoding* is going the other way around: from vocabulary indices, we want to get a string. This can be done with the `decode()` method as follows:
+
+```py
+decoded_string = tokenizer.decode([7993, 170, 11303, 1200, 2443, 1110, 3014])
+print(decoded_string)
+```
+
+```python out
+'Using a Transformer network is simple'
+```
+
+Note that the `decode` method not only converts the indices back to tokens, but also groups together the tokens that were part of the same words to produce a readable sentence. This behavior will be extremely useful when we use models that predict new text (either text generated from a prompt, or for sequence-to-sequence problems like translation or summarization).
+
+By now you should understand the atomic operations a tokenizer can handle: tokenization, conversion to IDs, and converting IDs back to a string. However, we've just scraped the tip of the iceberg. In the following section, we'll take our approach to its limits and take a look at how to overcome them.
diff --git a/5.mdx b/5.mdx
new file mode 100644
index 000000000..5a692aa19
--- /dev/null
+++ b/5.mdx
@@ -0,0 +1,338 @@
+<FrameworkSwitchCourse {fw} />
+
+# Handling multiple sequences
+
+{#if fw === 'pt'}
+
+<DocNotebookDropdown
+  classNames="absolute z-10 right-0 top-0"
+  options={[
+    {label: "Google Colab", value: "https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter2/section5_pt.ipynb"},
+    {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter2/section5_pt.ipynb"},
+]} />
+
+{:else}
+
+<DocNotebookDropdown
+  classNames="absolute z-10 right-0 top-0"
+  options={[
+    {label: "Google Colab", value: "https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter2/section5_tf.ipynb"},
+    {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter2/section5_tf.ipynb"},
+]} />
+
+{/if}
+
+{#if fw === 'pt'}
+<Youtube id="M6adb1j2jPI"/>
+{:else}
+<Youtube id="ROxrFOEbsQE"/>
+{/if}
+
+In the previous section, we explored the simplest of use cases: doing inference on a single sequence of a small length. However, some questions emerge already:
+
+- How do we handle multiple sequences?
+- How do we handle multiple sequences *of different lengths*?
+- Are vocabulary indices the only inputs that allow a model to work well?
+- Is there such a thing as too long a sequence?
+
+Let's see what kinds of problems these questions pose, and how we can solve them using the 🤗 Transformers API.
+
+## Models expect a batch of inputs
+
+In the previous exercise you saw how sequences get translated into lists of numbers. Let's convert this list of numbers to a tensor and send it to the model:
+
+{#if fw === 'pt'}
+```py
+import torch
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+
+checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
+
+sequence = "I've been waiting for a HuggingFace course my whole life."
+
+tokens = tokenizer.tokenize(sequence)
+ids = tokenizer.convert_tokens_to_ids(tokens)
+input_ids = torch.tensor(ids)
+# This line will fail.
+model(input_ids)
+```
+
+```python out
+IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)
+```
+{:else}
+```py
+import tensorflow as tf
+from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
+
+checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint)
+
+sequence = "I've been waiting for a HuggingFace course my whole life."
+
+tokens = tokenizer.tokenize(sequence)
+ids = tokenizer.convert_tokens_to_ids(tokens)
+input_ids = tf.constant(ids)
+# This line will fail.
+model(input_ids)
+```
+
+```py out
+InvalidArgumentError: Input to reshape is a tensor with 14 values, but the requested shape has 196 [Op:Reshape]
+```
+{/if}
+
+Oh no! Why did this fail? "We followed the steps from the pipeline in section 2.
+
+The problem is that we sent a single sequence to the model, whereas 🤗 Transformers models expect multiple sentences by default. Here we tried to do everything the tokenizer did behind the scenes when we applied it to a `sequence`, but if you look closely, you'll see that it didn't just convert the list of input IDs into a tensor, it added a dimension on top of it:
+
+{#if fw === 'pt'}
+```py
+tokenized_inputs = tokenizer(sequence, return_tensors="pt")
+print(tokenized_inputs["input_ids"])
+```
+
+```python out
+tensor([[  101,  1045,  1005,  2310,  2042,  3403,  2005,  1037, 17662, 12172,
+          2607,  2026,  2878,  2166,  1012,   102]])
+```
+{:else}
+```py
+tokenized_inputs = tokenizer(sequence, return_tensors="tf")
+print(tokenized_inputs["input_ids"])
+```
+
+```py out
+<tf.Tensor: shape=(1, 16), dtype=int32, numpy=
+array([[  101,  1045,  1005,  2310,  2042,  3403,  2005,  1037, 17662,
+        12172,  2607,  2026,  2878,  2166,  1012,   102]], dtype=int32)>
+```
+{/if}
+
+Let's try again and add a new dimension:
+
+{#if fw === 'pt'}
+```py
+import torch
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+
+checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
+
+sequence = "I've been waiting for a HuggingFace course my whole life."
+
+tokens = tokenizer.tokenize(sequence)
+ids = tokenizer.convert_tokens_to_ids(tokens)
+
+input_ids = torch.tensor([ids])
+print("Input IDs:", input_ids)
+
+output = model(input_ids)
+print("Logits:", output.logits)
+```
+{:else}
+```py
+import tensorflow as tf
+from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
+
+checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint)
+
+sequence = "I've been waiting for a HuggingFace course my whole life."
+
+tokens = tokenizer.tokenize(sequence)
+ids = tokenizer.convert_tokens_to_ids(tokens)
+
+input_ids = tf.constant([ids])
+print("Input IDs:", input_ids)
+
+output = model(input_ids)
+print("Logits:", output.logits)
+```
+{/if}
+
+We print the input IDs as well as the resulting logits — here's the output:
+
+{#if fw === 'pt'}
+```python out
+Input IDs: [[ 1045,  1005,  2310,  2042,  3403,  2005,  1037, 17662, 12172,  2607, 2026,  2878,  2166,  1012]]
+Logits: [[-2.7276,  2.8789]]
+```
+{:else}
+```py out
+Input IDs: tf.Tensor(
+[[ 1045  1005  2310  2042  3403  2005  1037 17662 12172  2607  2026  2878
+   2166  1012]], shape=(1, 14), dtype=int32)
+Logits: tf.Tensor([[-2.7276208  2.8789377]], shape=(1, 2), dtype=float32)
+```
+{/if}
+
+*Batching* is the act of sending multiple sentences through the model, all at once. If you only have one sentence, you can just build a batch with a single sequence: 
+
+```
+batched_ids = [ids, ids]
+```
+
+This is a batch of two identical sequences!
+
+<Tip>
+
+✏️ **Try it out!** Convert this `batched_ids` list into a tensor and pass it through your model. Check that you obtain the same logits as before (but twice)!
+
+</Tip>
+
+Batching allows the model to work when you feed it multiple sentences. Using multiple sequences is just as simple as building a batch with a single sequence. There's a second issue, though. When you're trying to batch together two (or more) sentences, they might be of different lengths. If you've ever worked with tensors before, you know that they need to be of rectangular shape, so you won't be able to convert the list of input IDs into a tensor directly. To work around this problem, we usually *pad* the inputs.
+
+## Padding the inputs
+
+The following list of lists cannot be converted to a tensor:
+
+```py no-format
+batched_ids = [
+    [200, 200, 200],
+    [200, 200]
+]
+```
+
+In order to work around this, we'll use *padding* to make our tensors have a rectangular shape. Padding makes sure all our sentences have the same length by adding a special word called the *padding token* to the sentences with fewer values. For example, if you have 10 sentences with 10 words and 1 sentence with 20 words, padding will ensure all the sentences have 20 words. In our example, the resulting tensor looks like this:
+
+```py no-format
+padding_id = 100
+
+batched_ids = [
+    [200, 200, 200],
+    [200, 200, padding_id],
+]
+```
+
+The padding token ID can be found in `tokenizer.pad_token_id`. Let's use it and send our two sentences through the model individually and batched together:
+
+{#if fw === 'pt'}
+```py no-format
+model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
+
+sequence1_ids = [[200, 200, 200]]
+sequence2_ids = [[200, 200]]
+batched_ids = [
+    [200, 200, 200],
+    [200, 200, tokenizer.pad_token_id],
+]
+
+print(model(torch.tensor(sequence1_ids)).logits)
+print(model(torch.tensor(sequence2_ids)).logits)
+print(model(torch.tensor(batched_ids)).logits)
+```
+
+```python out
+tensor([[ 1.5694, -1.3895]], grad_fn=<AddmmBackward>)
+tensor([[ 0.5803, -0.4125]], grad_fn=<AddmmBackward>)
+tensor([[ 1.5694, -1.3895],
+        [ 1.3373, -1.2163]], grad_fn=<AddmmBackward>)
+```
+{:else}
+```py no-format
+model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint)
+
+sequence1_ids = [[200, 200, 200]]
+sequence2_ids = [[200, 200]]
+batched_ids = [
+    [200, 200, 200],
+    [200, 200, tokenizer.pad_token_id],
+]
+
+print(model(tf.constant(sequence1_ids)).logits)
+print(model(tf.constant(sequence2_ids)).logits)
+print(model(tf.constant(batched_ids)).logits)
+```
+
+```py out
+tf.Tensor([[ 1.5693678 -1.3894581]], shape=(1, 2), dtype=float32)
+tf.Tensor([[ 0.5803005  -0.41252428]], shape=(1, 2), dtype=float32)
+tf.Tensor(
+[[ 1.5693681 -1.3894582]
+ [ 1.3373486 -1.2163193]], shape=(2, 2), dtype=float32)
+```
+{/if}
+
+There's something wrong with the logits in our batched predictions: the second row should be the same as the logits for the second sentence, but we've got completely different values!
+
+This is because the key feature of Transformer models is attention layers that *contextualize* each token. These will take into account the padding tokens since they attend to all of the tokens of a sequence. To get the same result when passing individual sentences of different lengths through the model or when passing a batch with the same sentences and padding applied, we need to tell those attention layers to ignore the padding tokens. This is done by using an attention mask.
+
+## Attention masks
+
+*Attention masks* are tensors with the exact same shape as the input IDs tensor, filled with 0s and 1s: 1s indicate the corresponding tokens should be attended to, and 0s indicate the corresponding tokens should not be attended to (i.e., they should be ignored by the attention layers of the model).
+
+Let's complete the previous example with an attention mask:
+
+{#if fw === 'pt'}
+```py no-format
+batched_ids = [
+    [200, 200, 200],
+    [200, 200, tokenizer.pad_token_id],
+]
+
+attention_mask = [
+    [1, 1, 1],
+    [1, 1, 0],
+]
+
+outputs = model(torch.tensor(batched_ids), attention_mask=torch.tensor(attention_mask))
+print(outputs.logits)
+```
+
+```python out
+tensor([[ 1.5694, -1.3895],
+        [ 0.5803, -0.4125]], grad_fn=<AddmmBackward>)
+```
+{:else}
+```py no-format
+batched_ids = [
+    [200, 200, 200],
+    [200, 200, tokenizer.pad_token_id],
+]
+
+attention_mask = [
+    [1, 1, 1],
+    [1, 1, 0],
+]
+
+outputs = model(tf.constant(batched_ids), attention_mask=tf.constant(attention_mask))
+print(outputs.logits)
+```
+
+```py out
+tf.Tensor(
+[[ 1.5693681  -1.3894582 ]
+ [ 0.5803021  -0.41252586]], shape=(2, 2), dtype=float32)
+```
+{/if}
+
+Now we get the same logits for the second sentence in the batch.
+
+Notice how the last value of the second sequence is a padding ID, which is a 0 value in the attention mask.
+
+<Tip>
+
+✏️ **Try it out!** Apply the tokenization manually on the two sentences used in section 2 ("I've been waiting for a HuggingFace course my whole life." and "I hate this so much!"). Pass them through the model and check that you get the same logits as in section 2. Now batch them together using the padding token, then create the proper attention mask. Check that you obtain the same results when going through the model!
+
+</Tip>
+
+## Longer sequences
+
+With Transformer models, there is a limit to the lengths of the sequences we can pass the models. Most models handle sequences of up to 512 or 1024 tokens, and will crash when asked to process longer sequences. There are two solutions to this problem:
+
+- Use a model with a longer supported sequence length.
+- Truncate your sequences.
+
+Models have different supported sequence lengths, and some specialize in handling very long sequences. [Longformer](https://huggingface.co/transformers/model_doc/longformer.html) is one example, and another is [LED](https://huggingface.co/transformers/model_doc/led.html). If you're working on a task that requires very long sequences, we recommend you take a look at those models.
+
+Otherwise, we recommend you truncate your sequences by specifying the `max_sequence_length` parameter:
+
+```py
+sequence = sequence[:max_sequence_length]
+```
diff --git a/6.mdx b/6.mdx
new file mode 100644
index 000000000..974123515
--- /dev/null
+++ b/6.mdx
@@ -0,0 +1,164 @@
+<FrameworkSwitchCourse {fw} />
+
+# Putting it all together
+
+{#if fw === 'pt'}
+
+<DocNotebookDropdown
+  classNames="absolute z-10 right-0 top-0"
+  options={[
+    {label: "Google Colab", value: "https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter2/section6_pt.ipynb"},
+    {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter2/section6_pt.ipynb"},
+]} />
+
+{:else}
+
+<DocNotebookDropdown
+  classNames="absolute z-10 right-0 top-0"
+  options={[
+    {label: "Google Colab", value: "https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter2/section6_tf.ipynb"},
+    {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter2/section6_tf.ipynb"},
+]} />
+
+{/if}
+
+In the last few sections, we've been trying our best to do most of the work by hand. We've explored how tokenizers work and looked at tokenization, conversion to input IDs, padding, truncation, and attention masks.
+
+However, as we saw in section 2, the 🤗 Transformers API can handle all of this for us with a high-level function that we'll dive into here. When you call your `tokenizer` directly on the sentence, you get back inputs that are ready to pass through your model:
+
+```py
+from transformers import AutoTokenizer
+
+checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+
+sequence = "I've been waiting for a HuggingFace course my whole life."
+
+model_inputs = tokenizer(sequence)
+```
+
+Here, the `model_inputs` variable contains everything that's necessary for a model to operate well. For DistilBERT, that includes the input IDs as well as the attention mask. Other models that accept additional inputs will also have those output by the `tokenizer` object.
+
+As we'll see in some examples below, this method is very powerful. First, it can tokenize a single sequence:
+
+```py
+sequence = "I've been waiting for a HuggingFace course my whole life."
+
+model_inputs = tokenizer(sequence)
+```
+
+It also handles multiple sequences at a time, with no change in the API:
+
+```py
+sequences = ["I've been waiting for a HuggingFace course my whole life.", "So have I!"]
+
+model_inputs = tokenizer(sequences)
+```
+
+It can pad according to several objectives:
+
+```py
+# Will pad the sequences up to the maximum sequence length
+model_inputs = tokenizer(sequences, padding="longest")
+
+# Will pad the sequences up to the model max length
+# (512 for BERT or DistilBERT)
+model_inputs = tokenizer(sequences, padding="max_length")
+
+# Will pad the sequences up to the specified max length
+model_inputs = tokenizer(sequences, padding="max_length", max_length=8)
+```
+
+It can also truncate sequences:
+
+```py
+sequences = ["I've been waiting for a HuggingFace course my whole life.", "So have I!"]
+
+# Will truncate the sequences that are longer than the model max length
+# (512 for BERT or DistilBERT)
+model_inputs = tokenizer(sequences, truncation=True)
+
+# Will truncate the sequences that are longer than the specified max length
+model_inputs = tokenizer(sequences, max_length=8, truncation=True)
+```
+
+The `tokenizer` object can handle the conversion to specific framework tensors, which can then be directly sent to the model. For example, in the following code sample we are prompting the tokenizer to return tensors from the different frameworks — `"pt"` returns PyTorch tensors, `"tf"` returns TensorFlow tensors, and `"np"` returns NumPy arrays:
+
+```py
+sequences = ["I've been waiting for a HuggingFace course my whole life.", "So have I!"]
+
+# Returns PyTorch tensors
+model_inputs = tokenizer(sequences, padding=True, return_tensors="pt")
+
+# Returns TensorFlow tensors
+model_inputs = tokenizer(sequences, padding=True, return_tensors="tf")
+
+# Returns NumPy arrays
+model_inputs = tokenizer(sequences, padding=True, return_tensors="np")
+```
+
+## Special tokens
+
+If we take a look at the input IDs returned by the tokenizer, we will see they are a tiny bit different from what we had earlier:
+
+```py
+sequence = "I've been waiting for a HuggingFace course my whole life."
+
+model_inputs = tokenizer(sequence)
+print(model_inputs["input_ids"])
+
+tokens = tokenizer.tokenize(sequence)
+ids = tokenizer.convert_tokens_to_ids(tokens)
+print(ids)
+```
+
+```python out
+[101, 1045, 1005, 2310, 2042, 3403, 2005, 1037, 17662, 12172, 2607, 2026, 2878, 2166, 1012, 102]
+[1045, 1005, 2310, 2042, 3403, 2005, 1037, 17662, 12172, 2607, 2026, 2878, 2166, 1012]
+```
+
+One token ID was added at the beginning, and one at the end. Let's decode the two sequences of IDs above to see what this is about:
+
+```py
+print(tokenizer.decode(model_inputs["input_ids"]))
+print(tokenizer.decode(ids))
+```
+
+```python out
+"[CLS] i've been waiting for a huggingface course my whole life. [SEP]"
+"i've been waiting for a huggingface course my whole life."
+```
+
+The tokenizer added the special word `[CLS]` at the beginning and the special word `[SEP]` at the end. This is because the model was pretrained with those, so to get the same results for inference we need to add them as well. Note that some models don't add special words, or add different ones; models may also add these special words only at the beginning, or only at the end. In any case, the tokenizer knows which ones are expected and will deal with this for you.
+
+## Wrapping up: From tokenizer to model
+
+Now that we've seen all the individual steps the `tokenizer` object uses when applied on texts, let's see one final time how it can handle multiple sequences (padding!), very long sequences (truncation!), and multiple types of tensors with its main API:
+
+{#if fw === 'pt'}
+```py
+import torch
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+
+checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
+sequences = ["I've been waiting for a HuggingFace course my whole life.", "So have I!"]
+
+tokens = tokenizer(sequences, padding=True, truncation=True, return_tensors="pt")
+output = model(**tokens)
+```
+{:else}
+```py
+import tensorflow as tf
+from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
+
+checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint)
+sequences = ["I've been waiting for a HuggingFace course my whole life.", "So have I!"]
+
+tokens = tokenizer(sequences, padding=True, truncation=True, return_tensors="tf")
+output = model(**tokens)
+```
+{/if}
diff --git a/7.mdx b/7.mdx
new file mode 100644
index 000000000..122728d08
--- /dev/null
+++ b/7.mdx
@@ -0,0 +1,13 @@
+# Basic usage completed!
+
+Great job following the course up to here! To recap, in this chapter you:
+
+- Learned the basic building blocks of a Transformer model.
+- Learned what makes up a tokenization pipeline.
+- Saw how to use a Transformer model in practice.
+- Learned how to leverage a tokenizer to convert text to tensors that are understandable by the model.
+- Set up a tokenizer and a model together to get from text to predictions.
+- Learned the limitations of input IDs, and learned about attention masks.
+- Played around with versatile and configurable tokenizer methods.
+
+From now on, you should be able to freely navigate the 🤗 Transformers docs: the vocabulary will sound familiar, and you've already seen the methods that you'll use the majority of the time.
diff --git a/8.mdx b/8.mdx
new file mode 100644
index 000000000..43f0a8c9c
--- /dev/null
+++ b/8.mdx
@@ -0,0 +1,305 @@
+<FrameworkSwitchCourse {fw} />
+
+<!-- DISABLE-FRONTMATTER-SECTIONS -->
+
+# End-of-chapter quiz
+
+### 1. What is the order of the language modeling pipeline?
+
+<Question
+	choices={[
+		{
+			text: "First, the model, which handles text and returns raw predictions. The tokenizer then makes sense of these predictions and converts them back to text when needed.",
+			explain: "The model cannot understand text! The tokenizer must first tokenize the text and convert it to IDs so that it is understandable by the model."
+		},
+		{
+			text: "First, the tokenizer, which handles text and returns IDs. The model handles these IDs and outputs a prediction, which can be some text.",
+			explain: "The model's prediction cannot be text straight away. The tokenizer has to be used in order to convert the prediction back to text!"
+		},
+		{
+			text: "The tokenizer handles text and returns IDs. The model handles these IDs and outputs a prediction. The tokenizer can then be used once again to convert these predictions back to some text.",
+			explain: "Correct! The tokenizer can be used for both tokenizing and de-tokenizing.",
+            correct: true
+		}
+	]}
+/>
+
+### 2. How many dimensions does the tensor output by the base Transformer model have, and what are they?
+
+<Question
+	choices={[
+		{
+			text: "2: The sequence length and the batch size",
+			explain: "False! The tensor output by the model has a third dimension: hidden size."
+		},
+		{
+			text: "2: The sequence length and the hidden size",
+			explain: "False! All Transformer models handle batches, even with a single sequence; that would be a batch size of 1!"
+		},
+		{
+			text: "3: The sequence length, the batch size, and the hidden size",
+			explain: "Correct!",
+            correct: true
+		}
+	]}
+/>
+
+### 3. Which of the following is an example of subword tokenization?
+
+<Question
+	choices={[
+		{
+			text: "WordPiece",
+			explain: "Yes, that's one example of subword tokenization!",
+            correct: true
+		},
+		{
+			text: "Character-based tokenization",
+			explain: "Character-based tokenization is not a type of subword tokenization."
+		},
+		{
+			text: "Splitting on whitespace and punctuation",
+			explain: "That's a word-based tokenization scheme!"
+		},
+		{
+			text: "BPE",
+			explain: "Yes, that's one example of subword tokenization!",
+            correct: true
+        },
+		{
+			text: "Unigram",
+			explain: "Yes, that's one example of subword tokenization!",
+            correct: true
+        },
+		{
+			text: "None of the above",
+			explain: "Incorrect!"
+        }
+	]}
+/>
+
+### 4. What is a model head?
+
+<Question
+	choices={[
+		{
+			text: "A component of the base Transformer network that redirects tensors to their correct layers",
+			explain: "Incorrect! There's no such component."
+		},
+		{
+			text: "Also known as the self-attention mechanism, it adapts the representation of a token according to the other tokens of the sequence",
+			explain: "Incorrect! The self-attention layer does contain attention \"heads,\" but these are not adaptation heads."
+		},
+		{
+			text: "An additional component, usually made up of one or a few layers, to convert the transformer predictions to a task-specific output",
+			explain: "That's right. Adaptation heads, also known simply as heads, come up in different forms: language modeling heads, question answering heads, sequence classification heads... ",
+			correct: true
+		} 
+	]}
+/>
+
+{#if fw === 'pt'}
+### 5. What is an AutoModel?
+
+<Question
+	choices={[
+		{
+			text: "A model that automatically trains on your data",
+			explain: "Incorrect. Are you mistaking this with our <a href='https://huggingface.co/autonlp'>AutoNLP</a> product?"
+		},
+		{
+			text: "An object that returns the correct architecture based on the checkpoint",
+			explain: "Exactly: the <code>AutoModel</code> only needs to know the checkpoint from which to initialize to return the correct architecture.",
+			correct: true
+		},
+		{
+			text: "A model that automatically detects the language used for its inputs to load the correct weights",
+			explain: "Incorrect; while some checkpoints and models are capable of handling multiple languages, there are no built-in tools for automatic checkpoint selection according to language. You should head over to the <a href='https://huggingface.co/models'>Model Hub</a> to find the best checkpoint for your task!"
+		} 
+	]}
+/>
+
+{:else}
+### 5. What is an TFAutoModel?
+
+<Question
+	choices={[
+		{
+			text: "A model that automatically trains on your data",
+			explain: "Incorrect. Are you mistaking this with our <a href='https://huggingface.co/autonlp'>AutoNLP</a> product?"
+		},
+		{
+			text: "An object that returns the correct architecture based on the checkpoint",
+			explain: "Exactly: the <code>TFAutoModel</code> only needs to know the checkpoint from which to initialize to return the correct architecture.",
+			correct: true
+		},
+		{
+			text: "A model that automatically detects the language used for its inputs to load the correct weights",
+			explain: "Incorrect; while some checkpoints and models are capable of handling multiple languages, there are no built-in tools for automatic checkpoint selection according to language. You should head over to the <a href='https://huggingface.co/models'>Model Hub</a> to find the best checkpoint for your task!"
+		} 
+	]}
+/>
+
+{/if}
+
+### 6. What are the techniques to be aware of when batching sequences of different lengths together?
+
+<Question
+	choices={[
+		{
+			text: "Truncating",
+			explain: "Yes, truncation is a correct way of evening out sequences so that they fit in a rectangular shape. Is it the only one, though?",
+			correct: true
+		},
+		{
+			text: "Returning tensors",
+			explain: "While the other techniques allow you to return rectangular tensors, returning tensors isn't helpful when batching sequences together."
+		},
+		{
+			text: "Padding",
+			explain: "Yes, padding is a correct way of evening out sequences so that they fit in a rectangular shape. Is it the only one, though?",
+			correct: true
+		}, 
+		{
+			text: "Attention masking",
+			explain: "Absolutely! Attention masks are of prime importance when handling sequences of different lengths. That's not the only technique to be aware of, however.",
+			correct: true
+		} 
+	]}
+/>
+
+### 7. What is the point of applying a SoftMax function to the logits output by a sequence classification model?
+
+<Question
+	choices={[
+		{
+			text: "It softens the logits so that they're more reliable.",
+			explain: "No, the SoftMax function does not affect the reliability of results."
+		},
+		{
+			text: "It applies a lower and upper bound so that they're understandable.",
+			explain: "Correct! The resulting values are bound between 0 and 1. That's not the only reason we use a SoftMax function, though.",
+            correct: true
+		},
+		{
+			text: "The total sum of the output is then 1, resulting in a possible probabilistic interpretation.",
+			explain: "Correct! That's not the only reason we use a SoftMax function, though.",
+            correct: true
+		}
+	]}
+/>
+
+### 8. What method is most of the tokenizer API centered around?
+
+<Question
+	choices={[
+		{
+			text: "<code>encode</code>, as it can encode text into IDs and IDs into predictions",
+			explain: "Wrong! While the <code>encode</code> method does exist on tokenizers, it does not exist on models."
+		},
+		{
+			text: "Calling the tokenizer object directly.",
+			explain: "Exactly! The <code>__call__</code> method of the tokenizer is a very powerful method which can handle pretty much anything. It is also the method used to retrieve predictions from a model.",
+			correct: true
+		},
+		{
+			text: "<code>pad</code>",
+			explain: "Wrong! Padding is very useful, but it's just one part of the tokenizer API."
+		},
+		{
+			text: "<code>tokenize</code>",
+			explain: "The <code>tokenize</code> method is arguably one of the most useful methods, but it isn't the core of the tokenizer API."
+		}
+	]}
+/>
+
+### 9. What does the `result` variable contain in this code sample?
+
+```py
+from transformers import AutoTokenizer
+
+tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
+result = tokenizer.tokenize("Hello!")
+```
+
+<Question
+	choices={[
+		{
+			text: "A list of strings, each string being a token",
+			explain: "Absolutely! Convert this to IDs, and send them to a model!",
+            correct: true
+		},
+		{
+			text: "A list of IDs",
+			explain: "Incorrect; that's what the <code>__call__</code> or <code>convert_tokens_to_ids</code> method is for!"
+		},
+		{
+			text: "A string containing all of the tokens",
+			explain: "This would be suboptimal, as the goal is to split the string into multiple tokens."
+		}
+	]}
+/>
+
+{#if fw === 'pt'}
+### 10. Is there something wrong with the following code?
+
+```py
+from transformers import AutoTokenizer, AutoModel
+
+tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
+model = AutoModel.from_pretrained("gpt2")
+
+encoded = tokenizer("Hey!", return_tensors="pt")
+result = model(**encoded)
+```
+
+<Question
+	choices={[
+		{
+			text: "No, it seems correct.",
+			explain: "Unfortunately, coupling a model with a tokenizer that was trained with a different checkpoint is rarely a good idea. The model was not trained to make sense out of this tokenizer's output, so the model output (if it can even run!) will not make any sense."
+		},
+		{
+			text: "The tokenizer and model should always be from the same checkpoint.",
+			explain: "Right!",
+            correct: true
+		},
+		{
+			text: "It's good practice to pad and truncate with the tokenizer as every input is a batch.",
+			explain: "It's true that every model input needs to be a batch. However, truncating or padding this sequence wouldn't necessarily make sense as there is only one of it, and those are techniques to batch together a list of sentences."
+		}
+	]}
+/>
+
+{:else}
+### 10. Is there something wrong with the following code?
+
+```py
+from transformers import AutoTokenizer, TFAutoModel
+
+tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
+model = TFAutoModel.from_pretrained("gpt2")
+
+encoded = tokenizer("Hey!", return_tensors="pt")
+result = model(**encoded)
+```
+
+<Question
+	choices={[
+		{
+			text: "No, it seems correct.",
+			explain: "Unfortunately, coupling a model with a tokenizer that was trained with a different checkpoint is rarely a good idea. The model was not trained to make sense out of this tokenizer's output, so the model output (if it can even run!) will not make any sense."
+		},
+		{
+			text: "The tokenizer and model should always be from the same checkpoint.",
+			explain: "Right!",
+            correct: true
+		},
+		{
+			text: "It's good practice to pad and truncate with the tokenizer as every input is a batch.",
+			explain: "It's true that every model input needs to be a batch. However, truncating or padding this sequence wouldn't necessarily make sense as there is only one of it, and those are techniques to batch together a list of sentences."
+		}
+	]}
+/>
+
+{/if}

From abdcbb7c3bd9eaea9924ca13c21632d995e92610 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 29 Mar 2022 19:55:22 +0100
Subject: [PATCH 015/127] Update 1.mdx

---
 chapters/it/chapter0/1.mdx | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/chapters/it/chapter0/1.mdx b/chapters/it/chapter0/1.mdx
index 6ab7c8e23..b3e18a208 100644
--- a/chapters/it/chapter0/1.mdx
+++ b/chapters/it/chapter0/1.mdx
@@ -1,6 +1,6 @@
-# Introduction
+# Introduzione
 
-Welcome to the Hugging Face course! This introduction will guide you through setting up a working environment. If you're just starting the course, we recommend you first take a look at [Chapter 1](/course/chapter1), then come back and set up your environment so you can try the code yourself.
+Benvenuti e benvenute al corso di Hugging Face! In questo capitolo introduttivo, vi aiuteremo a creare un ambiente di lavoro. If you're just starting the course, we recommend you first take a look at [Capitolo 1](/course/chapter1), then come back and set up your environment so you can try the code yourself.
 
 All the libraries that we'll be using in this course are available as Python packages, so here we'll show you how to set up a Python environment and install the specific libraries you'll need.
 
@@ -10,7 +10,7 @@ Note that we will not be covering the Windows system. If you're running on Windo
 
 Most of the course relies on you having a Hugging Face account. We recommend creating one now: [create an account](https://huggingface.co/join).
 
-## Using a Google Colab notebook
+## Come usare un blocco note Colab di Google
 
 Using a Colab notebook is the simplest possible setup; boot up a notebook in your browser and get straight to coding! 
 
@@ -46,7 +46,7 @@ This installs a very light version of 🤗 Transformers. In particular, no speci
 
 This will take a bit of time, but then you'll be ready to go for the rest of the course!
 
-## Using a Python virtual environment
+## Come usare un ambiente virtuale in Python
 
 If you prefer to use a Python virtual environment, the first step is to install Python on your system. We recommend following [this guide](https://realpython.com/installing-python/) to get started.
 
@@ -99,7 +99,7 @@ which python
 /home/<user>/transformers-course/.env/bin/python
 ```
 
-### Installing dependencies
+### Installazione dipendenze
 
 As in the previous section on using Google Colab instances, you'll now need to install the packages required to continue. Again, you can install the development version of 🤗 Transformers using the `pip` package manager:
 

From 8b2ca12b5467cb8463db2a6dcf0d949c6008420f Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 29 Mar 2022 20:20:09 +0100
Subject: [PATCH 016/127] Update 1.mdx

---
 chapters/it/chapter0/1.mdx | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/chapters/it/chapter0/1.mdx b/chapters/it/chapter0/1.mdx
index b3e18a208..802716add 100644
--- a/chapters/it/chapter0/1.mdx
+++ b/chapters/it/chapter0/1.mdx
@@ -1,10 +1,10 @@
 # Introduzione
 
-Benvenuti e benvenute al corso di Hugging Face! In questo capitolo introduttivo, vi aiuteremo a creare un ambiente di lavoro. If you're just starting the course, we recommend you first take a look at [Capitolo 1](/course/chapter1), then come back and set up your environment so you can try the code yourself.
+Benvenuto/a al corso di Hugging Face! In questo capitolo introduttivo, ti aiuteremo a configurare il tuo ambiente di lavoro. Se non hai ancora cominciato il corso, ti consigliamo di iniziare col dare un occhio al [Capitolo 1](/course/chapter1), per poi tornare qui a creare il tuo ambiente e lavorare al codice.
 
-All the libraries that we'll be using in this course are available as Python packages, so here we'll show you how to set up a Python environment and install the specific libraries you'll need.
+Tutte le librerie che useremo in questo corso sono disponibili come pacchetti Python, quindi qui ti mostreremo dapprima come configurare un ambiente Python e in seguito come installare le librerie di cui avrai bisogno.
 
-We'll cover two ways of setting up your working environment, using a Colab notebook or a Python virtual environment. Feel free to choose the one that resonates with you the most. For beginners, we strongly recommend that you get started by using a Colab notebook.
+Copriremo due modi per configurare un ambiente di lavoro: usando un blocco note Colab oppure un ambiente virtuale in Python. Sentiti libero/a di scegliere quello che ti sembra più adatto a te. Se sei un/a principiante, ti consigliamo vivamente di cominciare con un blocco note Colab.
 
 Note that we will not be covering the Windows system. If you're running on Windows, we recommend following along using a Colab notebook. If you're using a Linux distribution or macOS, you can use either approach described here.
 

From 04cc9d76fe52d75463b5d3dbb73ab2d380950a09 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 29 Mar 2022 20:31:13 +0100
Subject: [PATCH 017/127] Update 1.mdx

---
 chapters/it/chapter0/1.mdx | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/chapters/it/chapter0/1.mdx b/chapters/it/chapter0/1.mdx
index 802716add..87a6e853a 100644
--- a/chapters/it/chapter0/1.mdx
+++ b/chapters/it/chapter0/1.mdx
@@ -4,19 +4,19 @@ Benvenuto/a al corso di Hugging Face! In questo capitolo introduttivo, ti aiuter
 
 Tutte le librerie che useremo in questo corso sono disponibili come pacchetti Python, quindi qui ti mostreremo dapprima come configurare un ambiente Python e in seguito come installare le librerie di cui avrai bisogno.
 
-Copriremo due modi per configurare un ambiente di lavoro: usando un blocco note Colab oppure un ambiente virtuale in Python. Sentiti libero/a di scegliere quello che ti sembra più adatto a te. Se sei un/a principiante, ti consigliamo vivamente di cominciare con un blocco note Colab.
+Copriremo due modi per configurare un ambiente di lavoro: usando un blocco note Colab oppure un ambiente virtuale in Python. Sentiti libero/a di scegliere quello che ti sembra più adatto a te. Se sei un/a principiante, ti consigliamo vivamente di cominciare a lavorare con un blocco note Colab.
 
-Note that we will not be covering the Windows system. If you're running on Windows, we recommend following along using a Colab notebook. If you're using a Linux distribution or macOS, you can use either approach described here.
+Nota che non copriremo Windows. Se utilizzi Windows come sistema operativo, il nostro consiglio è di seguire il corso utilizzando un blocco note Colab. Se invece utilizzi Linux oppure macOS, puoi scegliere uno qualsiasi degli approcci descritti qui in seguito.
 
-Most of the course relies on you having a Hugging Face account. We recommend creating one now: [create an account](https://huggingface.co/join).
+Buona parte del corso richiede un account di Hugging Face. Ti consigliamo dunque di crearne uno al più presto: [create an account](https://huggingface.co/join).
 
 ## Come usare un blocco note Colab di Google
 
-Using a Colab notebook is the simplest possible setup; boot up a notebook in your browser and get straight to coding! 
+Il modo più semplice per configurare il tuo ambiente di lavoro è utilizzando Google Colab: una volta avviato un blocco note nel browser, puoi iniziare immediatamente a programmare! 
 
 If you're not familiar with Colab, we recommend you start by following the [introduction](https://colab.research.google.com/notebooks/intro.ipynb). Colab allows you to use some accelerating hardware, like GPUs or TPUs, and it is free for smaller workloads.
 
-Once you're comfortable moving around in Colab, create a new notebook and get started with the setup:
+Quando ti sentirai a tuo agio con Colab, crea un nuovo blocco note e inizia la configurazione:
 
 <div class="flex justify-center">
 <img src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter0/new_colab.png" alt="An empty colab notebook" width="80%"/>

From c9372958443c206a6058661f9319d10e3fdd7789 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 29 Mar 2022 20:44:33 +0100
Subject: [PATCH 018/127] Update 1.mdx

---
 chapters/it/chapter0/1.mdx | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/chapters/it/chapter0/1.mdx b/chapters/it/chapter0/1.mdx
index 87a6e853a..665a61eb5 100644
--- a/chapters/it/chapter0/1.mdx
+++ b/chapters/it/chapter0/1.mdx
@@ -8,13 +8,13 @@ Copriremo due modi per configurare un ambiente di lavoro: usando un blocco note
 
 Nota che non copriremo Windows. Se utilizzi Windows come sistema operativo, il nostro consiglio è di seguire il corso utilizzando un blocco note Colab. Se invece utilizzi Linux oppure macOS, puoi scegliere uno qualsiasi degli approcci descritti qui in seguito.
 
-Buona parte del corso richiede un account di Hugging Face. Ti consigliamo dunque di crearne uno al più presto: [create an account](https://huggingface.co/join).
+Buona parte del corso richiede un account di Hugging Face. Ti consigliamo dunque di crearne uno al più presto: [Crea un account](https://huggingface.co/join).
 
 ## Come usare un blocco note Colab di Google
 
 Il modo più semplice per configurare il tuo ambiente di lavoro è utilizzando Google Colab: una volta avviato un blocco note nel browser, puoi iniziare immediatamente a programmare! 
 
-If you're not familiar with Colab, we recommend you start by following the [introduction](https://colab.research.google.com/notebooks/intro.ipynb). Colab allows you to use some accelerating hardware, like GPUs or TPUs, and it is free for smaller workloads.
+Se non conosci bene Colab, ti raccomandiamo di iniziare dalla seguente [introduzione](https://colab.research.google.com/notebooks/intro.ipynb). Colab permette di utilizzare accelerazioni hardware come GPU o TPU, ed è gratuito per i carichi di lavoro più piccoli.
 
 Quando ti sentirai a tuo agio con Colab, crea un nuovo blocco note e inizia la configurazione:
 
@@ -22,7 +22,7 @@ Quando ti sentirai a tuo agio con Colab, crea un nuovo blocco note e inizia la c
 <img src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter0/new_colab.png" alt="An empty colab notebook" width="80%"/>
 </div>
 
-The next step is to install the libraries that we'll be using in this course. We'll use `pip` for the installation, which is the package manager for Python. In notebooks, you can run system commands by preceding them with the `!` character, so you can install the 🤗 Transformers library as follows:
+Il passo successivo consiste nell'installare le librerie che utilizzerai in questo corso. Per l'installazione, useremo `pip`, ossia il gestore di pacchetti di Python. In Google Colab, puoi inizializzare i tuoi comandi di sistema facendone precedere il nome dal carattere `!`. La libreria Transformers di 🤗 verrà quindi installata come segue:
 
 ```
 !pip install transformers

From 78fa66f99f81376291f9b459a52e399476976fea Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 29 Mar 2022 21:08:31 +0100
Subject: [PATCH 019/127] Update 1.mdx

---
 chapters/it/chapter0/1.mdx | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/chapters/it/chapter0/1.mdx b/chapters/it/chapter0/1.mdx
index 665a61eb5..0aa024aab 100644
--- a/chapters/it/chapter0/1.mdx
+++ b/chapters/it/chapter0/1.mdx
@@ -22,13 +22,13 @@ Quando ti sentirai a tuo agio con Colab, crea un nuovo blocco note e inizia la c
 <img src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter0/new_colab.png" alt="An empty colab notebook" width="80%"/>
 </div>
 
-Il passo successivo consiste nell'installare le librerie che utilizzerai in questo corso. Per l'installazione, useremo `pip`, ossia il gestore di pacchetti di Python. In Google Colab, puoi inizializzare i tuoi comandi di sistema facendone precedere il nome dal carattere `!`. La libreria Transformers di 🤗 verrà quindi installata come segue:
+Il passo successivo consiste nell'installare le librerie che utilizzerai in questo corso. Per l'installazione, useremo `pip`, ossia il gestore di pacchetti di Python. In Google Colab, puoi inizializzare i tuoi comandi di sistema facendone precedere il nome dal carattere `!`. La libreria Transformer di 🤗 verrà quindi installata come segue:
 
 ```
 !pip install transformers
 ```
 
-You can make sure the package was correctly installed by importing it within your Python runtime:
+Puoi assicurarti che il pacchetto sia stato installato correttamente importandolo nel tuo runtime in Python:
 
 ```
 import transformers
@@ -38,7 +38,7 @@ import transformers
 <img src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter0/install.gif" alt="A gif showing the result of the two commands above: installation and import" width="80%"/>
 </div>
 
-This installs a very light version of 🤗 Transformers. In particular, no specific machine learning frameworks (like PyTorch or TensorFlow) are installed. Since we'll be using a lot of different features of the library, we recommend installing the development version, which comes with all the required dependencies for pretty much any imaginable use case:
+Questa operazione installa una versione molto leggera dei Transformer 🤗 che non importa nessun quadro strutturale (framework) di apprendimento automatico (come PyTorch o TensorFlow). Dato che useremo molte caratteristiche (features) diverse della libreria, raccomandiamo l'installazione della versione per sviluppatori, la quale contiene praticamente tutte le dipendenze possibili e immaginabili:
 
 ```
 !pip install transformers[sentencepiece]

From 4ff746bc0b11fd23bc104c81516f93717dd93f9f Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 29 Mar 2022 21:11:01 +0100
Subject: [PATCH 020/127] Update 1.mdx

---
 chapters/it/chapter0/1.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/chapters/it/chapter0/1.mdx b/chapters/it/chapter0/1.mdx
index 0aa024aab..6c8fe96e3 100644
--- a/chapters/it/chapter0/1.mdx
+++ b/chapters/it/chapter0/1.mdx
@@ -44,7 +44,7 @@ Questa operazione installa una versione molto leggera dei Transformer 🤗 che n
 !pip install transformers[sentencepiece]
 ```
 
-This will take a bit of time, but then you'll be ready to go for the rest of the course!
+Quest'operazione richiederà un po' di tempo, ma poi sarai pronto/a per il tutto resto del corso!
 
 ## Come usare un ambiente virtuale in Python
 

From 4ea420e06147f1703cdb2db8d0c37f533d6632ae Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 29 Mar 2022 21:37:48 +0100
Subject: [PATCH 021/127] Update 1.mdx

---
 chapters/it/chapter0/1.mdx | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/chapters/it/chapter0/1.mdx b/chapters/it/chapter0/1.mdx
index 6c8fe96e3..0d5d789fc 100644
--- a/chapters/it/chapter0/1.mdx
+++ b/chapters/it/chapter0/1.mdx
@@ -38,38 +38,38 @@ import transformers
 <img src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter0/install.gif" alt="A gif showing the result of the two commands above: installation and import" width="80%"/>
 </div>
 
-Questa operazione installa una versione molto leggera dei Transformer 🤗 che non importa nessun quadro strutturale (framework) di apprendimento automatico (come PyTorch o TensorFlow). Dato che useremo molte caratteristiche (features) diverse della libreria, raccomandiamo l'installazione della versione per sviluppatori, la quale contiene praticamente tutte le dipendenze possibili e immaginabili:
+Quest'operazione installa una versione molto leggera dei Transformer 🤗 che non importa nessun quadro strutturale (framework) di apprendimento automatico (come PyTorch o TensorFlow). Dato che useremo molte caratteristiche (features) diverse della libreria, raccomandiamo l'installazione della versione per sviluppatori, la quale contiene praticamente tutte le dipendenze possibili e immaginabili:
 
 ```
 !pip install transformers[sentencepiece]
 ```
 
-Quest'operazione richiederà un po' di tempo, ma poi sarai pronto/a per il tutto resto del corso!
+L'operazione richiederà un po' di tempo, ma poi sarai pronto/a per il tutto resto del corso!
 
 ## Come usare un ambiente virtuale in Python
 
-If you prefer to use a Python virtual environment, the first step is to install Python on your system. We recommend following [this guide](https://realpython.com/installing-python/) to get started.
+Se preferisci utilizzare un ambiente virtuale in Python, il primo passo consiste nell'installazione di Python nel tuo sistema. Ti raccomandiamo di aiutarti con [questa guida](https://realpython.com/installing-python/).
 
-Once you have Python installed, you should be able to run Python commands in your terminal. You can start by running the following command to ensure that it is correctly installed before proceeding to the next steps: `python --version`. This should print out the Python version now available on your system.
+Quando avrai installato Python, dovresti riuscire a eseguire qualsiasi comando in Python sul terminale. Prima di procedere ai passi successivi, prova a eseguire il seguente comando per assicurarti che Python sia installato correttamente: `python --version`. Il comando dovrebbe stampare il nome della versione di Python installata nella tua macchina.
 
 When running a Python command in your terminal, such as `python --version`, you should think of the program running your command as the "main" Python on your system. We recommend keeping this main installation free of any packages, and using it to create separate environments for each application you work on — this way, each application can have its own dependencies and packages, and you won't need to worry about potential compatibility issues with other applications.
 
 In Python this is done with [*virtual environments*](https://docs.python.org/3/tutorial/venv.html), which are self-contained directory trees that each contain a Python installation with a particular Python version alongside all the packages the application needs. Creating such a virtual environment can be done with a number of different tools, but we'll use the official Python package for that purpose, which is called [`venv`](https://docs.python.org/3/library/venv.html#module-venv).
 
-First, create the directory you'd like your application to live in — for example, you might want to make a new directory called *transformers-course* at the root of your home directory:
+Innanzitutto, crea la cartella che ospiterà l'applicazione in questione, come ad esempio una cartella di nome *transformers-course* alla radice della tua home directory:
 
 ```
 mkdir ~/transformers-course
 cd ~/transformers-course
 ```
 
-From inside this directory, create a virtual environment using the Python `venv` module:
+All'interno di questa cartella, crea un ambiente virtuale utilizzando il modulo `venv` di Python:
 
 ```
 python -m venv .env
 ```
 
-You should now have a directory called *.env* in your otherwise empty folder:
+A questo punto, dovresti avere una cartella chiamata *.env* in quella che era la tua cartella vuota:
 
 ```
 ls -a
@@ -79,17 +79,17 @@ ls -a
 .      ..    .env
 ```
 
-You can jump in and out of your virtual environment with the `activate` and `deactivate` scripts:
+Puoi entrare e uscire dall'ambiente virtuale utilizzando gli script `activate` e `deactivate`:
 
 ```
-# Activate the virtual environment
+# Attiva l'ambiente virtuale
 source .env/bin/activate
 
-# Deactivate the virtual environment
+# Disattiva l'ambiente virtuale
 source .env/bin/deactivate
 ```
 
-You can make sure that the environment is activated by running the `which python` command: if it points to the virtual environment, then you have successfully activated it!
+Assicurati che l'ambiente sia configurato correttamente eseguendo il comando `which python`: se come risposta ottieni l'ambiente virtuale, significa che l'hai attivato bene!
 
 ```
 which python
@@ -101,10 +101,10 @@ which python
 
 ### Installazione dipendenze
 
-As in the previous section on using Google Colab instances, you'll now need to install the packages required to continue. Again, you can install the development version of 🤗 Transformers using the `pip` package manager:
+Come già menzionato nella sezione su Google Colab, il passo successivo consiste nell'installazione dei pacchetti richiesti dal corso. Ancora una volta, ti chiediamo di installare la versione per sviluppatori dei Transformer di 🤗 utilizzando il gestore di pacchetti `pip`:
 
 ```
 pip install "transformers[sentencepiece]"
 ```
 
-You're now all set up and ready to go!
+Abbiamo finito con le installazioni! Ora sei pronto/a a iniziare.

From 1b087c1716131c1ef6ad32f2f2911395cf103b7d Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 29 Mar 2022 21:57:30 +0100
Subject: [PATCH 022/127] Update 1.mdx

---
 chapters/it/chapter0/1.mdx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/chapters/it/chapter0/1.mdx b/chapters/it/chapter0/1.mdx
index 0d5d789fc..d73030b5a 100644
--- a/chapters/it/chapter0/1.mdx
+++ b/chapters/it/chapter0/1.mdx
@@ -52,9 +52,9 @@ Se preferisci utilizzare un ambiente virtuale in Python, il primo passo consiste
 
 Quando avrai installato Python, dovresti riuscire a eseguire qualsiasi comando in Python sul terminale. Prima di procedere ai passi successivi, prova a eseguire il seguente comando per assicurarti che Python sia installato correttamente: `python --version`. Il comando dovrebbe stampare il nome della versione di Python installata nella tua macchina.
 
-When running a Python command in your terminal, such as `python --version`, you should think of the program running your command as the "main" Python on your system. We recommend keeping this main installation free of any packages, and using it to create separate environments for each application you work on — this way, each application can have its own dependencies and packages, and you won't need to worry about potential compatibility issues with other applications.
+Quando esegui un comando in Python dal terminale, come ad esempio `python --version`, ti consigliamo di considerare il programma che esegue il tuo comando l'installazione "principale" di Python del tuo sistema. La nostra raccomandazione è di tenere questa installazione principale libera da pacchetti di ogni tipo, e di usarla per creare ambienti diversi per ogni applicazione alla quale lavorerai. In questo modo, ogni applicazione avrà le proprie dipendenze e i propri pacchetti, e non dovrai preoccuparti di eventiali problemi di compatibilità con altre applicazioni.
 
-In Python this is done with [*virtual environments*](https://docs.python.org/3/tutorial/venv.html), which are self-contained directory trees that each contain a Python installation with a particular Python version alongside all the packages the application needs. Creating such a virtual environment can be done with a number of different tools, but we'll use the official Python package for that purpose, which is called [`venv`](https://docs.python.org/3/library/venv.html#module-venv).
+In Python, questa operazione si effettua utilizzando gli [*ambienti virtuali*](https://docs.python.org/3/tutorial/venv.html). Questi ultimi sono  degli alberi di directory autonomi che contengono installazioni di Python diverse, ossia particolari versioni di Python unite a tutti i pacchetti richiesti da una certa applicazione. La creazione di ambienti virtuali di questo tipo si può attuare a mezzo di strumenti diversi, anche se qui useremo esclusivamente il pacchetto ufficiale di Python, [`venv`](https://docs.python.org/3/library/venv.html#module-venv).
 
 Innanzitutto, crea la cartella che ospiterà l'applicazione in questione, come ad esempio una cartella di nome *transformers-course* alla radice della tua home directory:
 

From aa62d24549683b49a8947404befb1bd620753027 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Thu, 31 Mar 2022 23:06:58 +0100
Subject: [PATCH 023/127] Update 1.mdx

---
 chapters/it/chapter1/1.mdx | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/chapters/it/chapter1/1.mdx b/chapters/it/chapter1/1.mdx
index b3f26714a..a2cc4bb14 100644
--- a/chapters/it/chapter1/1.mdx
+++ b/chapters/it/chapter1/1.mdx
@@ -1,15 +1,15 @@
-# Introduction
+# Introduzione
 
-## Welcome to the 🤗 Course!
+## Benvenuto/a al corso di 🤗!
 
 <Youtube id="00GKzGyWFEs" />
 
-This course will teach you about natural language processing (NLP) using libraries from the [Hugging Face](https://huggingface.co/) ecosystem — [🤗 Transformers](https://github.com/huggingface/transformers), [🤗 Datasets](https://github.com/huggingface/datasets), [🤗 Tokenizers](https://github.com/huggingface/tokenizers), and [🤗 Accelerate](https://github.com/huggingface/accelerate) — as well as the [Hugging Face Hub](https://huggingface.co/models). It's completely free and without ads.
+Questo corso ti insegnerà a eseguire compiti di trattamento automatico del linguaggio (TAL) utilizzando le librerie dell'ecosistema di [Hugging Face](https://huggingface.co/): [🤗 Transformers](https://github.com/huggingface/transformers), [🤗 Datasets](https://github.com/huggingface/datasets), [🤗 Tokenizers](https://github.com/huggingface/tokenizers), e [🤗 Accelerate](https://github.com/huggingface/accelerate). Ti insegneremo anche ad usare il nostro [Hugging Face Hub](https://huggingface.co/models), che è completamente gratuito e senza pubblicità.
 
 
-## What to expect?
+## Contenuti
 
-Here is a brief overview of the course:
+Eccoti un breve riassunto dei contenuti del corso:
 
 <div class="flex justify-center">
 <img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/summary.svg" alt="Brief overview of the chapters of the course.">
@@ -20,17 +20,17 @@ Here is a brief overview of the course:
 - Chapters 5 to 8 teach the basics of 🤗 Datasets and 🤗 Tokenizers before diving into classic NLP tasks. By the end of this part, you will be able to tackle the most common NLP problems by yourself.
 - Chapters 9 to 12 go beyond NLP, and explore how Transformer models can be used tackle tasks in speech processing and computer vision. Along the way, you'll learn how to build and share demos of your models, and optimize them for production environments. By the end of this part, you will be ready to apply 🤗 Transformers to (almost) any machine learning problem!
 
-This course:
+Questo corso:
 
-* Requires a good knowledge of Python
-* Is better taken after an introductory deep learning course, such as [fast.ai's](https://www.fast.ai/) [Practical Deep Learning for Coders](https://course.fast.ai/) or one of the programs developed by [DeepLearning.AI](https://www.deeplearning.ai/)
-* Does not expect prior [PyTorch](https://pytorch.org/) or [TensorFlow](https://www.tensorflow.org/) knowledge, though some familiarity with either of those will help
+* Richiede una buona conoscenza di Python
+* Andrebbe seguito di preferenza a seguito di un corso introduttivo di *deep learning* (apprendimento profondo), come ad esempio [fast.ai's](https://www.fast.ai/) [Practical Deep Learning for Coders](https://course.fast.ai/) oppure uno dei programmi sviluppati da [DeepLearning.AI](https://www.deeplearning.ai/)
+* Non richiede conoscenze pregresse di [PyTorch](https://pytorch.org/) o [TensorFlow](https://www.tensorflow.org/), nonostante sia gradita una conoscienza anche superficiale dell'uno o dell'altro
 
-After you've completed this course, we recommend checking out DeepLearning.AI's [Natural Language Processing Specialization](https://www.coursera.org/specializations/natural-language-processing?utm_source=deeplearning-ai&utm_medium=institutions&utm_campaign=20211011-nlp-2-hugging_face-page-nlp-refresh), which covers a wide range of traditional NLP models like naive Bayes and LSTMs that are well worth knowing about!
+Quando avrai completato questo corso, ti raccomandiamo di passare al [Natural Language Processing Specialization](https://www.coursera.org/specializations/natural-language-processing?utm_source=deeplearning-ai&utm_medium=institutions&utm_campaign=20211011-nlp-2-hugging_face-page-nlp-refresh) di DeepLearning.AI, which covers a wide range of traditional NLP models like naive Bayes and LSTMs that are well worth knowing about!
 
-## Who are we?
+## Chi siamo?
 
-About the authors:
+A proposito degli autori:
 
 **Matthew Carrigan** is a Machine Learning Engineer at Hugging Face. He lives in Dublin, Ireland and previously worked as an ML engineer at Parse.ly and before that as a post-doctoral researcher at Trinity College Dublin. He does not believe we're going to get to AGI by scaling existing architectures, but has high hopes for robot immortality regardless.
 

From bf39db908adecd49aec28230f9aa51293b871431 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Fri, 1 Apr 2022 09:47:13 +0100
Subject: [PATCH 024/127] Update 1.mdx

---
 chapters/it/chapter0/1.mdx | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/chapters/it/chapter0/1.mdx b/chapters/it/chapter0/1.mdx
index d73030b5a..e8f7c5bf6 100644
--- a/chapters/it/chapter0/1.mdx
+++ b/chapters/it/chapter0/1.mdx
@@ -2,9 +2,9 @@
 
 Benvenuto/a al corso di Hugging Face! In questo capitolo introduttivo, ti aiuteremo a configurare il tuo ambiente di lavoro. Se non hai ancora cominciato il corso, ti consigliamo di iniziare col dare un occhio al [Capitolo 1](/course/chapter1), per poi tornare qui a creare il tuo ambiente e lavorare al codice.
 
-Tutte le librerie che useremo in questo corso sono disponibili come pacchetti Python, quindi qui ti mostreremo dapprima come configurare un ambiente Python e in seguito come installare le librerie di cui avrai bisogno.
+Tutte le librerie che useremo in questo corso sono disponibili come pacchetti Python, e qui ti mostreremo dapprima come configurare un ambiente Python e in seguito come installare le librerie di cui avrai bisogno.
 
-Copriremo due modi per configurare un ambiente di lavoro: usando un blocco note Colab oppure un ambiente virtuale in Python. Sentiti libero/a di scegliere quello che ti sembra più adatto a te. Se sei un/a principiante, ti consigliamo vivamente di cominciare a lavorare con un blocco note Colab.
+Copriremo due modi per configurare un ambiente di lavoro: usando un blocco note Colab, oppure un ambiente virtuale in Python. Sentiti libero/a di scegliere quello che ti sembra più adatto a te. Se sei un/a principiante, ti consigliamo vivamente di cominciare a lavorare con un blocco note Colab.
 
 Nota che non copriremo Windows. Se utilizzi Windows come sistema operativo, il nostro consiglio è di seguire il corso utilizzando un blocco note Colab. Se invece utilizzi Linux oppure macOS, puoi scegliere uno qualsiasi degli approcci descritti qui in seguito.
 
@@ -38,7 +38,7 @@ import transformers
 <img src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter0/install.gif" alt="A gif showing the result of the two commands above: installation and import" width="80%"/>
 </div>
 
-Quest'operazione installa una versione molto leggera dei Transformer 🤗 che non importa nessun quadro strutturale (framework) di apprendimento automatico (come PyTorch o TensorFlow). Dato che useremo molte caratteristiche (features) diverse della libreria, raccomandiamo l'installazione della versione per sviluppatori, la quale contiene praticamente tutte le dipendenze possibili e immaginabili:
+Quest'operazione installa una versione molto leggera dei Transformer di 🤗 che non importa nessun quadro strutturale (*framework*) di apprendimento automatico (*machine learning*), come ad esempio PyTorch o TensorFlow. Dato che useremo numerose caratteristiche (*features*) della libreria, ti raccomandiamo l'installazione della versione per sviluppatori. Questa contiene praticamente tutte le dipendenze possibili e immaginabili:
 
 ```
 !pip install transformers[sentencepiece]
@@ -52,15 +52,15 @@ Se preferisci utilizzare un ambiente virtuale in Python, il primo passo consiste
 
 Quando avrai installato Python, dovresti riuscire a eseguire qualsiasi comando in Python sul terminale. Prima di procedere ai passi successivi, prova a eseguire il seguente comando per assicurarti che Python sia installato correttamente: `python --version`. Il comando dovrebbe stampare il nome della versione di Python installata nella tua macchina.
 
-Quando esegui un comando in Python dal terminale, come ad esempio `python --version`, ti consigliamo di considerare il programma che esegue il tuo comando l'installazione "principale" di Python del tuo sistema. La nostra raccomandazione è di tenere questa installazione principale libera da pacchetti di ogni tipo, e di usarla per creare ambienti diversi per ogni applicazione alla quale lavorerai. In questo modo, ogni applicazione avrà le proprie dipendenze e i propri pacchetti, e non dovrai preoccuparti di eventiali problemi di compatibilità con altre applicazioni.
+Quando esegui un comando in Python dal terminale, come ad esempio `python --version`, ti consigliamo di considerare il programma che esegue il tuo comando l'installazione "principale" di Python del tuo sistema. La nostra raccomandazione è di tenere quest'installazione principale libera da pacchetti di ogni tipo, e di usarla per creare ambienti diversi per ogni applicazione alla quale lavorerai. In questo modo, ogni applicazione avrà le proprie dipendenze e i propri pacchetti, e non dovrai preoccuparti di eventuali problemi di compatibilità con altre applicazioni.
 
-In Python, questa operazione si effettua utilizzando gli [*ambienti virtuali*](https://docs.python.org/3/tutorial/venv.html). Questi ultimi sono  degli alberi di directory autonomi che contengono installazioni di Python diverse, ossia particolari versioni di Python unite a tutti i pacchetti richiesti da una certa applicazione. La creazione di ambienti virtuali di questo tipo si può attuare a mezzo di strumenti diversi, anche se qui useremo esclusivamente il pacchetto ufficiale di Python, [`venv`](https://docs.python.org/3/library/venv.html#module-venv).
+In Python, quest'operazione si effettua utilizzando gli [*ambienti virtuali*](https://docs.python.org/3/tutorial/venv.html) (*virtual environments*). Questi ultimi sono  degli alberi di directory autonomi che contengono installazioni di Python diverse, ossia particolari versioni di Python unite a tutti i pacchetti richiesti da una certa applicazione. La creazione di ambienti virtuali di questo tipo si può attuare a mezzo di strumenti diversi, anche se qui useremo esclusivamente il pacchetto ufficiale di Python, [`venv`](https://docs.python.org/3/library/venv.html#module-venv).
 
-Innanzitutto, crea la cartella che ospiterà l'applicazione in questione, come ad esempio una cartella di nome *transformers-course* alla radice della tua home directory:
+Innanzitutto, crea la cartella che ospiterà l'applicazione in questione, come ad esempio una cartella di nome *corso-transformers* alla radice della tua home directory:
 
 ```
-mkdir ~/transformers-course
-cd ~/transformers-course
+mkdir ~/corso-transformers
+cd ~/corso-transformers
 ```
 
 All'interno di questa cartella, crea un ambiente virtuale utilizzando il modulo `venv` di Python:
@@ -96,7 +96,7 @@ which python
 ```
 
 ```out
-/home/<user>/transformers-course/.env/bin/python
+/home/<user>/corso-transformers/.env/bin/python
 ```
 
 ### Installazione dipendenze

From 6a2381fc10493c84b745819db1bd0b3ddb7ef37f Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Sat, 2 Apr 2022 22:29:22 +0100
Subject: [PATCH 025/127] Corrects errors and adds translation

---
 chapters/it/chapter1/1.mdx | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/chapters/it/chapter1/1.mdx b/chapters/it/chapter1/1.mdx
index a2cc4bb14..ef1f8d0f8 100644
--- a/chapters/it/chapter1/1.mdx
+++ b/chapters/it/chapter1/1.mdx
@@ -4,7 +4,7 @@
 
 <Youtube id="00GKzGyWFEs" />
 
-Questo corso ti insegnerà a eseguire compiti di trattamento automatico del linguaggio (TAL) utilizzando le librerie dell'ecosistema di [Hugging Face](https://huggingface.co/): [🤗 Transformers](https://github.com/huggingface/transformers), [🤗 Datasets](https://github.com/huggingface/datasets), [🤗 Tokenizers](https://github.com/huggingface/tokenizers), e [🤗 Accelerate](https://github.com/huggingface/accelerate). Ti insegneremo anche ad usare il nostro [Hugging Face Hub](https://huggingface.co/models), che è completamente gratuito e senza pubblicità.
+Questo corso ti insegnerà a eseguire compiti di elaborazione del linguaggio naturale (*Natural Language Processing*, NLP) utilizzando le librerie dell'ecosistema di [Hugging Face](https://huggingface.co/): [Transformers di 🤗](https://github.com/huggingface/transformers), [Datasets di 🤗](https://github.com/huggingface/datasets), [Tokenizers di 🤗](https://github.com/huggingface/tokenizers), e [Accelerate di 🤗](https://github.com/huggingface/accelerate). Ti insegneremo anche ad usare il nostro [Hugging Face Hub](https://huggingface.co/models), che è completamente gratuito e senza pubblicità.
 
 
 ## Contenuti
@@ -16,9 +16,9 @@ Eccoti un breve riassunto dei contenuti del corso:
 <img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/summary-dark.svg" alt="Brief overview of the chapters of the course.">
 </div>
 
-- Chapters 1 to 4 provide an introduction to the main concepts of the 🤗 Transformers library. By the end of this part of the course, you will be familiar with how Transformer models work and will know how to use a model from the [Hugging Face Hub](https://huggingface.co/models), fine-tune it on a dataset, and share your results on the Hub!
-- Chapters 5 to 8 teach the basics of 🤗 Datasets and 🤗 Tokenizers before diving into classic NLP tasks. By the end of this part, you will be able to tackle the most common NLP problems by yourself.
-- Chapters 9 to 12 go beyond NLP, and explore how Transformer models can be used tackle tasks in speech processing and computer vision. Along the way, you'll learn how to build and share demos of your models, and optimize them for production environments. By the end of this part, you will be ready to apply 🤗 Transformers to (almost) any machine learning problem!
+- I capitoli da 1 a 4 forniscono un'introduzione ai concetti principali della libreria Transformers di 🤗. Alla fine di questa parte del corso, conoscerai come funzionano i modelli Transformers e saprai come utilizzare un modello della [Hugging Face Hub](https://huggingface.co/models), ottimizzarlo in un dataset, e condividere i tuoi risultati nel Hub!
+- I capitoli da 5 a 8 insegnano le basi dei Datasets di 🤗 e dei Tokenisers di 🤗, per poi esplorare alcuni compiti classici di NLP. Alla fine di questa parte, saprai far fronte ai problemi di NLP più comuni in maniera autonoma.
+- I capitoli da 9 a 12 vanno oltre il NLP, ed esplorano come i modelli Transformer possano essere utilizzati per affrontare compiti di elaborazione vocale o visione artificiale. Strada facendo, imparerai a costruire e condividere dimostrazioni (*demo*) dei tuoi modelli, e ad ottimizzarli per la produzione. Alla fine di questa parte, sarai pronto ad utilizzare i Transformers di 🤗 per qualsiasi problema di apprendimento automatico, o quasi!
 
 Questo corso:
 

From 5f64fa67ff120d5d2623ddf6cedf1573b08fa97f Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 5 Apr 2022 22:31:27 +0100
Subject: [PATCH 026/127] Adds paragraph

---
 chapters/it/chapter1/1.mdx | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/chapters/it/chapter1/1.mdx b/chapters/it/chapter1/1.mdx
index ef1f8d0f8..30026adf4 100644
--- a/chapters/it/chapter1/1.mdx
+++ b/chapters/it/chapter1/1.mdx
@@ -4,7 +4,7 @@
 
 <Youtube id="00GKzGyWFEs" />
 
-Questo corso ti insegnerà a eseguire compiti di elaborazione del linguaggio naturale (*Natural Language Processing*, NLP) utilizzando le librerie dell'ecosistema di [Hugging Face](https://huggingface.co/): [Transformers di 🤗](https://github.com/huggingface/transformers), [Datasets di 🤗](https://github.com/huggingface/datasets), [Tokenizers di 🤗](https://github.com/huggingface/tokenizers), e [Accelerate di 🤗](https://github.com/huggingface/accelerate). Ti insegneremo anche ad usare il nostro [Hugging Face Hub](https://huggingface.co/models), che è completamente gratuito e senza pubblicità.
+Questo corso ti insegnerà a eseguire compiti di elaborazione del linguaggio naturale (*Natural Language Processing*, NLP) utilizzando le librerie dell'ecosistema di [Hugging Face](https://huggingface.co/): [Transformer 🤗](https://github.com/huggingface/transformers), [Dataset 🤗](https://github.com/huggingface/datasets), [Tokenizer 🤗](https://github.com/huggingface/tokenizers), e [Accelerate 🤗](https://github.com/huggingface/accelerate). Ti insegneremo anche ad usare il nostro [Hugging Face Hub](https://huggingface.co/models), che è completamente gratuito e senza pubblicità.
 
 
 ## Contenuti
@@ -16,9 +16,9 @@ Eccoti un breve riassunto dei contenuti del corso:
 <img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/summary-dark.svg" alt="Brief overview of the chapters of the course.">
 </div>
 
-- I capitoli da 1 a 4 forniscono un'introduzione ai concetti principali della libreria Transformers di 🤗. Alla fine di questa parte del corso, conoscerai come funzionano i modelli Transformers e saprai come utilizzare un modello della [Hugging Face Hub](https://huggingface.co/models), ottimizzarlo in un dataset, e condividere i tuoi risultati nel Hub!
-- I capitoli da 5 a 8 insegnano le basi dei Datasets di 🤗 e dei Tokenisers di 🤗, per poi esplorare alcuni compiti classici di NLP. Alla fine di questa parte, saprai far fronte ai problemi di NLP più comuni in maniera autonoma.
-- I capitoli da 9 a 12 vanno oltre il NLP, ed esplorano come i modelli Transformer possano essere utilizzati per affrontare compiti di elaborazione vocale o visione artificiale. Strada facendo, imparerai a costruire e condividere dimostrazioni (*demo*) dei tuoi modelli, e ad ottimizzarli per la produzione. Alla fine di questa parte, sarai pronto ad utilizzare i Transformers di 🤗 per qualsiasi problema di apprendimento automatico, o quasi!
+- I capitoli da 1 a 4 forniscono un'introduzione ai concetti principali della libreria Transformer 🤗. Alla fine di questa parte del corso, conoscerai come funzionano i modelli Transformers e saprai come utilizzare un modello della [Hugging Face Hub](https://huggingface.co/models), ottimizzarlo in un dataset, e condividere i tuoi risultati nel Hub!
+- I capitoli da 5 a 8 insegnano le basi dei Dataset 🤗 e dei Tokeniser 🤗, per poi esplorare alcuni compiti classici di NLP. Alla fine di questa parte, saprai far fronte ai problemi di NLP più comuni in maniera autonoma.
+- I capitoli da 9 a 12 vanno oltre il NLP, ed esplorano come i modelli Transformer possano essere utilizzati per affrontare compiti di elaborazione vocale o visione artificiale. Strada facendo, imparerai a costruire e condividere dimostrazioni (*demo*) dei tuoi modelli, e ad ottimizzarli per la produzione. Alla fine di questa parte, sarai pronto ad utilizzare i Transformer 🤗 per qualsiasi problema di apprendimento automatico, o quasi!
 
 Questo corso:
 
@@ -26,7 +26,7 @@ Questo corso:
 * Andrebbe seguito di preferenza a seguito di un corso introduttivo di *deep learning* (apprendimento profondo), come ad esempio [fast.ai's](https://www.fast.ai/) [Practical Deep Learning for Coders](https://course.fast.ai/) oppure uno dei programmi sviluppati da [DeepLearning.AI](https://www.deeplearning.ai/)
 * Non richiede conoscenze pregresse di [PyTorch](https://pytorch.org/) o [TensorFlow](https://www.tensorflow.org/), nonostante sia gradita una conoscienza anche superficiale dell'uno o dell'altro
 
-Quando avrai completato questo corso, ti raccomandiamo di passare al [Natural Language Processing Specialization](https://www.coursera.org/specializations/natural-language-processing?utm_source=deeplearning-ai&utm_medium=institutions&utm_campaign=20211011-nlp-2-hugging_face-page-nlp-refresh) di DeepLearning.AI, which covers a wide range of traditional NLP models like naive Bayes and LSTMs that are well worth knowing about!
+Quando avrai completato questo corso, ti raccomandiamo di passare al [Natural Language Processing Specialization](https://www.coursera.org/specializations/natural-language-processing?utm_source=deeplearning-ai&utm_medium=institutions&utm_campaign=20211011-nlp-2-hugging_face-page-nlp-refresh) di DeepLearning.AI, un corso che copre un ampio spettro di modelli tradizionali di NLP che vale davvero la pena di conoscere, come Naive Bayes e Memoria a breve termine a lungo termine (LSTM)!
 
 ## Chi siamo?
 

From 450ccfc77abcc819a42bf56f65cb0d236b485e92 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 5 Apr 2022 22:36:55 +0100
Subject: [PATCH 027/127] Adds last paragraph

---
 chapters/it/chapter1/1.mdx | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/chapters/it/chapter1/1.mdx b/chapters/it/chapter1/1.mdx
index 30026adf4..e35191d08 100644
--- a/chapters/it/chapter1/1.mdx
+++ b/chapters/it/chapter1/1.mdx
@@ -26,7 +26,7 @@ Questo corso:
 * Andrebbe seguito di preferenza a seguito di un corso introduttivo di *deep learning* (apprendimento profondo), come ad esempio [fast.ai's](https://www.fast.ai/) [Practical Deep Learning for Coders](https://course.fast.ai/) oppure uno dei programmi sviluppati da [DeepLearning.AI](https://www.deeplearning.ai/)
 * Non richiede conoscenze pregresse di [PyTorch](https://pytorch.org/) o [TensorFlow](https://www.tensorflow.org/), nonostante sia gradita una conoscienza anche superficiale dell'uno o dell'altro
 
-Quando avrai completato questo corso, ti raccomandiamo di passare al [Natural Language Processing Specialization](https://www.coursera.org/specializations/natural-language-processing?utm_source=deeplearning-ai&utm_medium=institutions&utm_campaign=20211011-nlp-2-hugging_face-page-nlp-refresh) di DeepLearning.AI, un corso che copre un ampio spettro di modelli tradizionali di NLP che vale davvero la pena di conoscere, come Naive Bayes e Memoria a breve termine a lungo termine (LSTM)!
+Quando avrai completato questo corso, ti raccomandiamo di passare al [Natural Language Processing Specialization](https://www.coursera.org/specializations/natural-language-processing?utm_source=deeplearning-ai&utm_medium=institutions&utm_campaign=20211011-nlp-2-hugging_face-page-nlp-refresh) di DeepLearning.AI, un corso che copre un ampio spettro di modelli tradizionali di NLP che vale davvero la pena di conoscere, come Naive Bayes e Memoria a breve termine a lungo termine (*LSTM*)!
 
 ## Chi siamo?
 
@@ -46,7 +46,7 @@ A proposito degli autori:
 
 **Leandro von Werra**  is a machine learning engineer in the open-source team at Hugging Face and also a co-author of the an upcoming [O’Reilly book on Transformers](https://www.oreilly.com/library/view/natural-language-processing/9781098103231/). He has several years of industry experience bringing NLP projects to production by working across the whole machine learning stack..
 
-Are you ready to roll? In this chapter, you will learn:
-* How to use the `pipeline()` function to solve NLP tasks such as text generation and classification
-* About the Transformer architecture
-* How to distinguish between encoder, decoder, and encoder-decoder architectures and use cases
+Sei pronto/a a iniziare? In questo capitolo, imparerai:
+* Ad utilizzare la funzione `pipeline()` per eseguire compiti di NLP come la generazione e classificazione di testi
+* L'architettura dei Transformer
+* Come fare la distinzione tra architetture encoder, decoder, e encoder-decoder, nonché casi d'uso

From 2776715c1601e5649c40cd9f733761a972e2bc43 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 5 Apr 2022 23:13:51 +0100
Subject: [PATCH 028/127] Ends the translation

---
 chapters/it/chapter1/1.mdx | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/chapters/it/chapter1/1.mdx b/chapters/it/chapter1/1.mdx
index e35191d08..087c890e2 100644
--- a/chapters/it/chapter1/1.mdx
+++ b/chapters/it/chapter1/1.mdx
@@ -32,21 +32,21 @@ Quando avrai completato questo corso, ti raccomandiamo di passare al [Natural La
 
 A proposito degli autori:
 
-**Matthew Carrigan** is a Machine Learning Engineer at Hugging Face. He lives in Dublin, Ireland and previously worked as an ML engineer at Parse.ly and before that as a post-doctoral researcher at Trinity College Dublin. He does not believe we're going to get to AGI by scaling existing architectures, but has high hopes for robot immortality regardless.
+**Matthew Carrigan** è Machine Learning Engineer da Hugging Face. Vive a Dublino, in Irlanda, ed in passato è stato ML engineer da Parse.ly, e prima ancora ricercatore postdottorale al Trinity College di Dublin. Nonostante non creda che otterremo l'Intelligenza artificiale forte semplicemente ingrandendo le architetture a nostra disposizione, spera comunque nell'immortalità cibernetica.
 
-**Lysandre Debut** is a Machine Learning Engineer at Hugging Face and has been working on the 🤗 Transformers library since the very early development stages. His aim is to make NLP accessible for everyone by developing tools with a very simple API.
+**Lysandre Debut** è Machine Learning Engineer da Hugging Face e ha lavorato ai Transformer 🤗 fin dalle primissime tappe del loro sviluppo. Il suo obiettivo è di rendere l'elaborazione del linguaggio naturale accessibile a tutti sviluppando strumenti con un semplice API.
 
-**Sylvain Gugger** is a Research Engineer at Hugging Face and one of the core maintainers of the 🤗 Transformers library. Previously he was a Research Scientist at fast.ai, and he co-wrote _[Deep Learning for Coders with fastai and PyTorch](https://learning.oreilly.com/library/view/deep-learning-for/9781492045519/)_ with Jeremy Howard. The main focus of his research is on making deep learning more accessible, by designing and improving techniques that allow models to train fast on limited resources.
+**Sylvain Gugger** è Research Engineer da Hugging Face e uno dei principali manutentori della libreria Transformers 🤗. In passato, è stato Research Scientist da fast.ai, e ha scritto [Deep Learning for Coders with fastai and PyTorch](https://learning.oreilly.com/library/view/deep-learning-for/9781492045519/) con Jeremy Howard. Il centro principale della sua ricerca consiste nel rendere il deep learning (*apprendimento profondo*) più accessibile, concependo e migliorando tecniche che permettano di allenare modelli velocemente con risorse limitate.
 
-**Merve Noyan** is a developer advocate at Hugging Face, working on developing tools and building content around them to democratize machine learning for everyone.
+**Merve Noyan** è developer advocate da Hugging Face, e lavora allo sviluppo di strumenti e alla creazione di contenuti ad essi legati per democratizzare l'accesso al deep learning.
 
-**Lucile Saulnier** is a machine learning engineer at Hugging Face, developing and supporting the use of open source tools. She is also actively involved in many research projects in the field of Natural Language Processing such as collaborative training and BigScience.
+**Lucile Saulnier** è machine learning engineer da Hugging Face, e sviluppa e supporta l'utilizzo di strumenti open source. È anche attivamente coinvolta in numerosi progetti di ricerca nell'ambito dell'elaborazione del linguaggio naturale, come ad esempio collaborative training e BigScience.
 
-**Lewis Tunstall**  is a machine learning engineer at Hugging Face, focused on developing open-source tools and making them accessible to the wider community. He is also a co-author of an upcoming [O’Reilly book on Transformers](https://www.oreilly.com/library/view/natural-language-processing/9781098103231/).
+**Lewis Tunstall** è machine learning engineer da Hugging Face che si specializza nello sviluppo di strumenti open-source e la loro distribuzione alla comunità più ampia. È anche co-autore dell'imminente [O’Reilly book on Transformers](https://www.oreilly.com/library/view/natural-language-processing/9781098103231/).
 
-**Leandro von Werra**  is a machine learning engineer in the open-source team at Hugging Face and also a co-author of the an upcoming [O’Reilly book on Transformers](https://www.oreilly.com/library/view/natural-language-processing/9781098103231/). He has several years of industry experience bringing NLP projects to production by working across the whole machine learning stack..
+**Leandro von Werra** è machine learning engineer nel team open-source di Hugging Face, nonché co-autore dell'[O’Reilly book on Transformers](https://www.oreilly.com/library/view/natural-language-processing/9781098103231/). Ha tanti anni di esperienza nel portare progetti di NLP in produzione, lavorando a tutti i livelli dello stack di machine learning.
 
 Sei pronto/a a iniziare? In questo capitolo, imparerai:
 * Ad utilizzare la funzione `pipeline()` per eseguire compiti di NLP come la generazione e classificazione di testi
 * L'architettura dei Transformer
-* Come fare la distinzione tra architetture encoder, decoder, e encoder-decoder, nonché casi d'uso
+* Come fare la distinzione tra architetture encoder, decoder, encoder-decoder, e casi d'uso

From d77847092802642e4ac3fad78b56e20eb66ca59f Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 5 Apr 2022 23:14:43 +0100
Subject: [PATCH 029/127] Fixes a typo

---
 chapters/it/chapter1/1.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/chapters/it/chapter1/1.mdx b/chapters/it/chapter1/1.mdx
index 087c890e2..093681121 100644
--- a/chapters/it/chapter1/1.mdx
+++ b/chapters/it/chapter1/1.mdx
@@ -44,7 +44,7 @@ A proposito degli autori:
 
 **Lewis Tunstall** è machine learning engineer da Hugging Face che si specializza nello sviluppo di strumenti open-source e la loro distribuzione alla comunità più ampia. È anche co-autore dell'imminente [O’Reilly book on Transformers](https://www.oreilly.com/library/view/natural-language-processing/9781098103231/).
 
-**Leandro von Werra** è machine learning engineer nel team open-source di Hugging Face, nonché co-autore dell'[O’Reilly book on Transformers](https://www.oreilly.com/library/view/natural-language-processing/9781098103231/). Ha tanti anni di esperienza nel portare progetti di NLP in produzione, lavorando a tutti i livelli dello stack di machine learning.
+**Leandro von Werra** è machine learning engineer nel team open-source di Hugging Face, nonché co-autore dell'imminente [O’Reilly book on Transformers](https://www.oreilly.com/library/view/natural-language-processing/9781098103231/). Ha tanti anni di esperienza nel portare progetti di NLP in produzione, lavorando a tutti i livelli dello stack di machine learning.
 
 Sei pronto/a a iniziare? In questo capitolo, imparerai:
 * Ad utilizzare la funzione `pipeline()` per eseguire compiti di NLP come la generazione e classificazione di testi

From fd468a4e0f0e9b988fd80366aeca96f4c0427f7b Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 5 Apr 2022 23:16:20 +0100
Subject: [PATCH 030/127] Fixes a typo

---
 chapters/it/chapter1/1.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/chapters/it/chapter1/1.mdx b/chapters/it/chapter1/1.mdx
index 093681121..06b31a96f 100644
--- a/chapters/it/chapter1/1.mdx
+++ b/chapters/it/chapter1/1.mdx
@@ -18,7 +18,7 @@ Eccoti un breve riassunto dei contenuti del corso:
 
 - I capitoli da 1 a 4 forniscono un'introduzione ai concetti principali della libreria Transformer 🤗. Alla fine di questa parte del corso, conoscerai come funzionano i modelli Transformers e saprai come utilizzare un modello della [Hugging Face Hub](https://huggingface.co/models), ottimizzarlo in un dataset, e condividere i tuoi risultati nel Hub!
 - I capitoli da 5 a 8 insegnano le basi dei Dataset 🤗 e dei Tokeniser 🤗, per poi esplorare alcuni compiti classici di NLP. Alla fine di questa parte, saprai far fronte ai problemi di NLP più comuni in maniera autonoma.
-- I capitoli da 9 a 12 vanno oltre il NLP, ed esplorano come i modelli Transformer possano essere utilizzati per affrontare compiti di elaborazione vocale o visione artificiale. Strada facendo, imparerai a costruire e condividere dimostrazioni (*demo*) dei tuoi modelli, e ad ottimizzarli per la produzione. Alla fine di questa parte, sarai pronto ad utilizzare i Transformer 🤗 per qualsiasi problema di apprendimento automatico, o quasi!
+- I capitoli da 9 a 12 vanno oltre l'elaborazione del linguaggio naturale, ed esplorano come i modelli Transformer possano essere utilizzati per affrontare compiti di elaborazione vocale o visione artificiale. Strada facendo, imparerai a costruire e condividere dimostrazioni (*demo*) dei tuoi modelli, e ad ottimizzarli per la produzione. Alla fine di questa parte, sarai pronto ad utilizzare i Transformer 🤗 per qualsiasi problema di apprendimento automatico, o quasi!
 
 Questo corso:
 

From fe3ced9809278faeee35bea3b03a8c5469d916a2 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Wed, 6 Apr 2022 16:27:14 +0100
Subject: [PATCH 031/127] Update 1.mdx

---
 chapters/it/chapter1/1.mdx | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/chapters/it/chapter1/1.mdx b/chapters/it/chapter1/1.mdx
index 06b31a96f..9355fcb1d 100644
--- a/chapters/it/chapter1/1.mdx
+++ b/chapters/it/chapter1/1.mdx
@@ -4,7 +4,7 @@
 
 <Youtube id="00GKzGyWFEs" />
 
-Questo corso ti insegnerà a eseguire compiti di elaborazione del linguaggio naturale (*Natural Language Processing*, NLP) utilizzando le librerie dell'ecosistema di [Hugging Face](https://huggingface.co/): [Transformer 🤗](https://github.com/huggingface/transformers), [Dataset 🤗](https://github.com/huggingface/datasets), [Tokenizer 🤗](https://github.com/huggingface/tokenizers), e [Accelerate 🤗](https://github.com/huggingface/accelerate). Ti insegneremo anche ad usare il nostro [Hugging Face Hub](https://huggingface.co/models), che è completamente gratuito e senza pubblicità.
+Questo corso ti insegnerà a eseguire compiti di elaborazione del linguaggio naturale (*Natural Language Processing*, NLP) utilizzando le librerie dell'ecosistema di [Hugging Face](https://huggingface.co/): [🤗 Transformer](https://github.com/huggingface/transformers), [🤗 Dataset](https://github.com/huggingface/datasets), [🤗 Tokenizer](https://github.com/huggingface/tokenizers), e [🤗 Accelerate](https://github.com/huggingface/accelerate). Ti insegneremo anche ad usare il nostro [Hugging Face Hub](https://huggingface.co/models), che è completamente gratuito e senza pubblicità.
 
 
 ## Contenuti
@@ -16,9 +16,9 @@ Eccoti un breve riassunto dei contenuti del corso:
 <img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/summary-dark.svg" alt="Brief overview of the chapters of the course.">
 </div>
 
-- I capitoli da 1 a 4 forniscono un'introduzione ai concetti principali della libreria Transformer 🤗. Alla fine di questa parte del corso, conoscerai come funzionano i modelli Transformers e saprai come utilizzare un modello della [Hugging Face Hub](https://huggingface.co/models), ottimizzarlo in un dataset, e condividere i tuoi risultati nel Hub!
-- I capitoli da 5 a 8 insegnano le basi dei Dataset 🤗 e dei Tokeniser 🤗, per poi esplorare alcuni compiti classici di NLP. Alla fine di questa parte, saprai far fronte ai problemi di NLP più comuni in maniera autonoma.
-- I capitoli da 9 a 12 vanno oltre l'elaborazione del linguaggio naturale, ed esplorano come i modelli Transformer possano essere utilizzati per affrontare compiti di elaborazione vocale o visione artificiale. Strada facendo, imparerai a costruire e condividere dimostrazioni (*demo*) dei tuoi modelli, e ad ottimizzarli per la produzione. Alla fine di questa parte, sarai pronto ad utilizzare i Transformer 🤗 per qualsiasi problema di apprendimento automatico, o quasi!
+- I capitoli da 1 a 4 forniscono un'introduzione ai concetti principali della libreria 🤗 Transformers. Alla fine di questa parte del corso, conoscerai come funzionano i modelli Transformers e saprai come utilizzare un modello della [Hugging Face Hub](https://huggingface.co/models), ottimizzarlo in un dataset, e condividere i tuoi risultati nel Hub!
+- I capitoli da 5 a 8 insegnano le basi degli 🤗 Dataset e degli 🤗 Tokeniser, per poi esplorare alcuni compiti classici di NLP. Alla fine di questa parte, saprai far fronte ai problemi di NLP più comuni in maniera autonoma.
+- I capitoli da 9 a 12 vanno oltre l'elaborazione del linguaggio naturale, ed esplorano come i modelli Transformer possano essere utilizzati per affrontare compiti di elaborazione vocale o visione artificiale. Strada facendo, imparerai a costruire e condividere dimostrazioni (*demo*) dei tuoi modelli, e ad ottimizzarli per la produzione. Alla fine di questa parte, sarai pronto ad utilizzare gli 🤗 Transformer per qualsiasi problema di apprendimento automatico, o quasi!
 
 Questo corso:
 
@@ -34,9 +34,9 @@ A proposito degli autori:
 
 **Matthew Carrigan** è Machine Learning Engineer da Hugging Face. Vive a Dublino, in Irlanda, ed in passato è stato ML engineer da Parse.ly, e prima ancora ricercatore postdottorale al Trinity College di Dublin. Nonostante non creda che otterremo l'Intelligenza artificiale forte semplicemente ingrandendo le architetture a nostra disposizione, spera comunque nell'immortalità cibernetica.
 
-**Lysandre Debut** è Machine Learning Engineer da Hugging Face e ha lavorato ai Transformer 🤗 fin dalle primissime tappe del loro sviluppo. Il suo obiettivo è di rendere l'elaborazione del linguaggio naturale accessibile a tutti sviluppando strumenti con un semplice API.
+**Lysandre Debut** è Machine Learning Engineer da Hugging Face e ha lavorato agli 🤗 Transformer fin dalle primissime tappe del loro sviluppo. Il suo obiettivo è di rendere l'elaborazione del linguaggio naturale accessibile a tutti sviluppando strumenti con un semplice API.
 
-**Sylvain Gugger** è Research Engineer da Hugging Face e uno dei principali manutentori della libreria Transformers 🤗. In passato, è stato Research Scientist da fast.ai, e ha scritto [Deep Learning for Coders with fastai and PyTorch](https://learning.oreilly.com/library/view/deep-learning-for/9781492045519/) con Jeremy Howard. Il centro principale della sua ricerca consiste nel rendere il deep learning (*apprendimento profondo*) più accessibile, concependo e migliorando tecniche che permettano di allenare modelli velocemente con risorse limitate.
+**Sylvain Gugger** è Research Engineer da Hugging Face e uno dei principali manutentori della libreria 🤗 Transformers. In passato, è stato Research Scientist da fast.ai, e ha scritto [Deep Learning for Coders with fastai and PyTorch](https://learning.oreilly.com/library/view/deep-learning-for/9781492045519/) con Jeremy Howard. Il centro principale della sua ricerca consiste nel rendere il deep learning (*apprendimento profondo*) più accessibile, concependo e migliorando tecniche che permettano di allenare modelli velocemente con risorse limitate.
 
 **Merve Noyan** è developer advocate da Hugging Face, e lavora allo sviluppo di strumenti e alla creazione di contenuti ad essi legati per democratizzare l'accesso al deep learning.
 
@@ -44,7 +44,7 @@ A proposito degli autori:
 
 **Lewis Tunstall** è machine learning engineer da Hugging Face che si specializza nello sviluppo di strumenti open-source e la loro distribuzione alla comunità più ampia. È anche co-autore dell'imminente [O’Reilly book on Transformers](https://www.oreilly.com/library/view/natural-language-processing/9781098103231/).
 
-**Leandro von Werra** è machine learning engineer nel team open-source di Hugging Face, nonché co-autore dell'imminente [O’Reilly book on Transformers](https://www.oreilly.com/library/view/natural-language-processing/9781098103231/). Ha tanti anni di esperienza nel portare progetti di NLP in produzione, lavorando a tutti i livelli dello stack di machine learning.
+**Leandro von Werra** è machine learning engineer nel team open-source di Hugging Face, nonché co-autore dell'imminente [O’Reilly book on Transformers](https://www.oreilly.com/library/view/natural-language-processing/9781098103231/). Ha tanti anni di esperienza nel portare progetti di NLP in produzione, lavorando a tutti i livelli di esecuzione di machine learning.
 
 Sei pronto/a a iniziare? In questo capitolo, imparerai:
 * Ad utilizzare la funzione `pipeline()` per eseguire compiti di NLP come la generazione e classificazione di testi

From 16f8290ad4ef4b6085a2d9ebdfc7aa95591cc317 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Wed, 6 Apr 2022 16:27:50 +0100
Subject: [PATCH 032/127] Update 1.mdx

---
 chapters/it/chapter1/1.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/chapters/it/chapter1/1.mdx b/chapters/it/chapter1/1.mdx
index 9355fcb1d..eb8122003 100644
--- a/chapters/it/chapter1/1.mdx
+++ b/chapters/it/chapter1/1.mdx
@@ -4,7 +4,7 @@
 
 <Youtube id="00GKzGyWFEs" />
 
-Questo corso ti insegnerà a eseguire compiti di elaborazione del linguaggio naturale (*Natural Language Processing*, NLP) utilizzando le librerie dell'ecosistema di [Hugging Face](https://huggingface.co/): [🤗 Transformer](https://github.com/huggingface/transformers), [🤗 Dataset](https://github.com/huggingface/datasets), [🤗 Tokenizer](https://github.com/huggingface/tokenizers), e [🤗 Accelerate](https://github.com/huggingface/accelerate). Ti insegneremo anche ad usare il nostro [Hugging Face Hub](https://huggingface.co/models), che è completamente gratuito e senza pubblicità.
+Questo corso ti insegnerà a eseguire compiti di elaborazione del linguaggio naturale (*Natural Language Processing*, NLP) utilizzando le librerie dell'ecosistema di [Hugging Face](https://huggingface.co/): [🤗 Transformers](https://github.com/huggingface/transformers), [🤗 Datasets](https://github.com/huggingface/datasets), [🤗 Tokenizers](https://github.com/huggingface/tokenizers), e [🤗 Accelerates](https://github.com/huggingface/accelerate). Ti insegneremo anche ad usare il nostro [Hugging Face Hub](https://huggingface.co/models), che è completamente gratuito e senza pubblicità.
 
 
 ## Contenuti

From efeed9fd6ecbe657423bd470e82b3105ee6dce32 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Wed, 6 Apr 2022 17:39:48 +0100
Subject: [PATCH 033/127] fixes problems

---
 chapters/it/chapter1/1.mdx | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/chapters/it/chapter1/1.mdx b/chapters/it/chapter1/1.mdx
index eb8122003..3f16f50a8 100644
--- a/chapters/it/chapter1/1.mdx
+++ b/chapters/it/chapter1/1.mdx
@@ -4,7 +4,7 @@
 
 <Youtube id="00GKzGyWFEs" />
 
-Questo corso ti insegnerà a eseguire compiti di elaborazione del linguaggio naturale (*Natural Language Processing*, NLP) utilizzando le librerie dell'ecosistema di [Hugging Face](https://huggingface.co/): [🤗 Transformers](https://github.com/huggingface/transformers), [🤗 Datasets](https://github.com/huggingface/datasets), [🤗 Tokenizers](https://github.com/huggingface/tokenizers), e [🤗 Accelerates](https://github.com/huggingface/accelerate). Ti insegneremo anche ad usare il nostro [Hugging Face Hub](https://huggingface.co/models), che è completamente gratuito e senza pubblicità.
+Questo corso ti insegnerà a eseguire compiti di Natural Language Processing (NLP, *elaborazione del linguaggio naturale*) utilizzando le librerie dell'ecosistema di [Hugging Face](https://huggingface.co/): [🤗 Transformers](https://github.com/huggingface/transformers), [🤗 Datasets](https://github.com/huggingface/datasets), [🤗 Tokenizers](https://github.com/huggingface/tokenizers), e [🤗 Accelerates](https://github.com/huggingface/accelerate). Ti insegneremo anche ad usare il nostro [Hugging Face Hub](https://huggingface.co/models), che è completamente gratuito e senza pubblicità.
 
 
 ## Contenuti
@@ -16,17 +16,17 @@ Eccoti un breve riassunto dei contenuti del corso:
 <img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/summary-dark.svg" alt="Brief overview of the chapters of the course.">
 </div>
 
-- I capitoli da 1 a 4 forniscono un'introduzione ai concetti principali della libreria 🤗 Transformers. Alla fine di questa parte del corso, conoscerai come funzionano i modelli Transformers e saprai come utilizzare un modello della [Hugging Face Hub](https://huggingface.co/models), ottimizzarlo in un dataset, e condividere i tuoi risultati nel Hub!
+- I capitoli da 1 a 4 forniscono un'introduzione ai concetti principali della libreria 🤗 Transformers. Alla fine di questa parte del corso, conoscerai come funzionano i modelli Transformers e saprai come utilizzare un modello della [Hugging Face Hub](https://huggingface.co/models), affinarlo in un dataset, e condividere i tuoi risultati nell'Hub!
 - I capitoli da 5 a 8 insegnano le basi degli 🤗 Dataset e degli 🤗 Tokeniser, per poi esplorare alcuni compiti classici di NLP. Alla fine di questa parte, saprai far fronte ai problemi di NLP più comuni in maniera autonoma.
-- I capitoli da 9 a 12 vanno oltre l'elaborazione del linguaggio naturale, ed esplorano come i modelli Transformer possano essere utilizzati per affrontare compiti di elaborazione vocale o visione artificiale. Strada facendo, imparerai a costruire e condividere dimostrazioni (*demo*) dei tuoi modelli, e ad ottimizzarli per la produzione. Alla fine di questa parte, sarai pronto ad utilizzare gli 🤗 Transformer per qualsiasi problema di apprendimento automatico, o quasi!
+- I capitoli da 9 a 12 vanno oltre il Natural Language Processing, ed esplorano come i modelli Transformer possano essere utilizzati per affrontare compiti di elaborazione vocale o visione artificiale. Strada facendo, imparerai a costruire e condividere demo (*dimostrazioni*) dei tuoi modelli, e ad ottimizzarli per la produzione. Alla fine di questa parte, sarai pronto ad utilizzare gli 🤗 Transformer per qualsiasi problema di machine learning (*apprendimento automatico*), o quasi!
 
 Questo corso:
 
 * Richiede una buona conoscenza di Python
-* Andrebbe seguito di preferenza a seguito di un corso introduttivo di *deep learning* (apprendimento profondo), come ad esempio [fast.ai's](https://www.fast.ai/) [Practical Deep Learning for Coders](https://course.fast.ai/) oppure uno dei programmi sviluppati da [DeepLearning.AI](https://www.deeplearning.ai/)
-* Non richiede conoscenze pregresse di [PyTorch](https://pytorch.org/) o [TensorFlow](https://www.tensorflow.org/), nonostante sia gradita una conoscienza anche superficiale dell'uno o dell'altro
+* Andrebbe seguito di preferenza a seguito di un corso introduttivo di deep learning (*apprendimento profondo*), come ad esempio [fast.ai's](https://www.fast.ai/) [Practical Deep Learning for Coders](https://course.fast.ai/) oppure uno dei programmi sviluppati da [DeepLearning.AI](https://www.deeplearning.ai/)
+* Non richiede conoscenze pregresse di [PyTorch](https://pytorch.org/) o [TensorFlow](https://www.tensorflow.org/), nonostante sia gradita una conoscenza anche superficiale dell'uno o dell'altro
 
-Quando avrai completato questo corso, ti raccomandiamo di passare al [Natural Language Processing Specialization](https://www.coursera.org/specializations/natural-language-processing?utm_source=deeplearning-ai&utm_medium=institutions&utm_campaign=20211011-nlp-2-hugging_face-page-nlp-refresh) di DeepLearning.AI, un corso che copre un ampio spettro di modelli tradizionali di NLP che vale davvero la pena di conoscere, come Naive Bayes e Memoria a breve termine a lungo termine (*LSTM*)!
+Quando avrai completato questo corso, ti raccomandiamo di passare al [Natural Language Processing Specialization](https://www.coursera.org/specializations/natural-language-processing?utm_source=deeplearning-ai&utm_medium=institutions&utm_campaign=20211011-nlp-2-hugging_face-page-nlp-refresh) di DeepLearning.AI, un corso che copre un ampio spettro di modelli tradizionali di NLP che vale davvero la pena di conoscere, come Naive Bayes e LSTM (*Memoria a breve termine a lungo termine*)!
 
 ## Chi siamo?
 
@@ -34,17 +34,17 @@ A proposito degli autori:
 
 **Matthew Carrigan** è Machine Learning Engineer da Hugging Face. Vive a Dublino, in Irlanda, ed in passato è stato ML engineer da Parse.ly, e prima ancora ricercatore postdottorale al Trinity College di Dublin. Nonostante non creda che otterremo l'Intelligenza artificiale forte semplicemente ingrandendo le architetture a nostra disposizione, spera comunque nell'immortalità cibernetica.
 
-**Lysandre Debut** è Machine Learning Engineer da Hugging Face e ha lavorato agli 🤗 Transformer fin dalle primissime tappe del loro sviluppo. Il suo obiettivo è di rendere l'elaborazione del linguaggio naturale accessibile a tutti sviluppando strumenti con un semplice API.
+**Lysandre Debut** è Machine Learning Engineer da Hugging Face e ha lavorato agli 🤗 Transformer fin dalle primissime tappe del loro sviluppo. Il suo obiettivo è di rendere il NLP accessibile a tutti sviluppando strumenti con un semplice API.
 
 **Sylvain Gugger** è Research Engineer da Hugging Face e uno dei principali manutentori della libreria 🤗 Transformers. In passato, è stato Research Scientist da fast.ai, e ha scritto [Deep Learning for Coders with fastai and PyTorch](https://learning.oreilly.com/library/view/deep-learning-for/9781492045519/) con Jeremy Howard. Il centro principale della sua ricerca consiste nel rendere il deep learning (*apprendimento profondo*) più accessibile, concependo e migliorando tecniche che permettano di allenare modelli velocemente con risorse limitate.
 
 **Merve Noyan** è developer advocate da Hugging Face, e lavora allo sviluppo di strumenti e alla creazione di contenuti ad essi legati per democratizzare l'accesso al deep learning.
 
-**Lucile Saulnier** è machine learning engineer da Hugging Face, e sviluppa e supporta l'utilizzo di strumenti open source. È anche attivamente coinvolta in numerosi progetti di ricerca nell'ambito dell'elaborazione del linguaggio naturale, come ad esempio collaborative training e BigScience.
+**Lucile Saulnier** è machine learning engineer da Hugging Face, e sviluppa e supporta l'utilizzo di strumenti open source. È anche attivamente coinvolta in numerosi progetti di ricerca nell'ambito del NLP, come ad esempio collaborative training e BigScience.
 
 **Lewis Tunstall** è machine learning engineer da Hugging Face che si specializza nello sviluppo di strumenti open-source e la loro distribuzione alla comunità più ampia. È anche co-autore dell'imminente [O’Reilly book on Transformers](https://www.oreilly.com/library/view/natural-language-processing/9781098103231/).
 
-**Leandro von Werra** è machine learning engineer nel team open-source di Hugging Face, nonché co-autore dell'imminente [O’Reilly book on Transformers](https://www.oreilly.com/library/view/natural-language-processing/9781098103231/). Ha tanti anni di esperienza nel portare progetti di NLP in produzione, lavorando a tutti i livelli di esecuzione di machine learning.
+**Leandro von Werra** è machine learning engineer nel team open-source di Hugging Face, nonché co-autore dell'imminente [O’Reilly book on Transformers](https://www.oreilly.com/library/view/natural-language-processing/9781098103231/). Ha tanti anni di esperienza nel portare progetti di NLP in produzione, lavorando a tutti i livelli di esecuzione di compiti di machine learning.
 
 Sei pronto/a a iniziare? In questo capitolo, imparerai:
 * Ad utilizzare la funzione `pipeline()` per eseguire compiti di NLP come la generazione e classificazione di testi

From f3573af76a10999db2d95719efdab0ba619d5f68 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Thu, 7 Apr 2022 15:11:40 +0100
Subject: [PATCH 034/127] Final version

---
 chapters/it/chapter1/1.mdx | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/chapters/it/chapter1/1.mdx b/chapters/it/chapter1/1.mdx
index 3f16f50a8..4fd68aa6a 100644
--- a/chapters/it/chapter1/1.mdx
+++ b/chapters/it/chapter1/1.mdx
@@ -4,7 +4,7 @@
 
 <Youtube id="00GKzGyWFEs" />
 
-Questo corso ti insegnerà a eseguire compiti di Natural Language Processing (NLP, *elaborazione del linguaggio naturale*) utilizzando le librerie dell'ecosistema di [Hugging Face](https://huggingface.co/): [🤗 Transformers](https://github.com/huggingface/transformers), [🤗 Datasets](https://github.com/huggingface/datasets), [🤗 Tokenizers](https://github.com/huggingface/tokenizers), e [🤗 Accelerates](https://github.com/huggingface/accelerate). Ti insegneremo anche ad usare il nostro [Hugging Face Hub](https://huggingface.co/models), che è completamente gratuito e senza pubblicità.
+Questo corso ti insegnerà a eseguire compiti di Natural Language Processing (NLP, *elaborazione del linguaggio naturale*) utilizzando le librerie dell'ecosistema di [Hugging Face](https://huggingface.co/): [🤗 Transformers](https://github.com/huggingface/transformers), [🤗 Datasets](https://github.com/huggingface/datasets), [🤗 Tokenizers](https://github.com/huggingface/tokenizers), e [🤗 Accelerate](https://github.com/huggingface/accelerate). Ti insegneremo anche ad usare il nostro [Hugging Face Hub](https://huggingface.co/models), che è completamente gratuito e senza pubblicità.
 
 
 ## Contenuti
@@ -16,14 +16,14 @@ Eccoti un breve riassunto dei contenuti del corso:
 <img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/summary-dark.svg" alt="Brief overview of the chapters of the course.">
 </div>
 
-- I capitoli da 1 a 4 forniscono un'introduzione ai concetti principali della libreria 🤗 Transformers. Alla fine di questa parte del corso, conoscerai come funzionano i modelli Transformers e saprai come utilizzare un modello della [Hugging Face Hub](https://huggingface.co/models), affinarlo in un dataset, e condividere i tuoi risultati nell'Hub!
-- I capitoli da 5 a 8 insegnano le basi degli 🤗 Dataset e degli 🤗 Tokeniser, per poi esplorare alcuni compiti classici di NLP. Alla fine di questa parte, saprai far fronte ai problemi di NLP più comuni in maniera autonoma.
+- I capitoli da 1 a 4 forniscono un'introduzione ai concetti principali della libreria 🤗 Transformers. Alla fine di questa parte del corso, conoscerai come funzionano i modelli Transformers e saprai come utilizzare un modello dell'[Hugging Face Hub](https://huggingface.co/models), affinarlo in un dataset, e condividere i tuoi risultati nell'Hub!
+- I capitoli da 5 a 8 insegnano le basi degli 🤗 Dataset e degli 🤗 Tokenizer, per poi esplorare alcuni compiti classici di NLP. Alla fine di questa parte, saprai far fronte ai problemi di NLP più comuni in maniera autonoma.
 - I capitoli da 9 a 12 vanno oltre il Natural Language Processing, ed esplorano come i modelli Transformer possano essere utilizzati per affrontare compiti di elaborazione vocale o visione artificiale. Strada facendo, imparerai a costruire e condividere demo (*dimostrazioni*) dei tuoi modelli, e ad ottimizzarli per la produzione. Alla fine di questa parte, sarai pronto ad utilizzare gli 🤗 Transformer per qualsiasi problema di machine learning (*apprendimento automatico*), o quasi!
 
 Questo corso:
 
 * Richiede una buona conoscenza di Python
-* Andrebbe seguito di preferenza a seguito di un corso introduttivo di deep learning (*apprendimento profondo*), come ad esempio [fast.ai's](https://www.fast.ai/) [Practical Deep Learning for Coders](https://course.fast.ai/) oppure uno dei programmi sviluppati da [DeepLearning.AI](https://www.deeplearning.ai/)
+* Andrebbe seguito di preferenza a seguito di un corso introduttivo di deep learning (*apprendimento profondo*), come ad esempio il [Practical Deep Learning for Coders](https://course.fast.ai/) di [fast.ai](https://www.fast.ai/), oppure uno dei programmi sviluppati da [DeepLearning.AI](https://www.deeplearning.ai/)
 * Non richiede conoscenze pregresse di [PyTorch](https://pytorch.org/) o [TensorFlow](https://www.tensorflow.org/), nonostante sia gradita una conoscenza anche superficiale dell'uno o dell'altro
 
 Quando avrai completato questo corso, ti raccomandiamo di passare al [Natural Language Processing Specialization](https://www.coursera.org/specializations/natural-language-processing?utm_source=deeplearning-ai&utm_medium=institutions&utm_campaign=20211011-nlp-2-hugging_face-page-nlp-refresh) di DeepLearning.AI, un corso che copre un ampio spettro di modelli tradizionali di NLP che vale davvero la pena di conoscere, come Naive Bayes e LSTM (*Memoria a breve termine a lungo termine*)!

From 92a4d272e4919b3c475ca948bd1f105bca76e6d6 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Thu, 7 Apr 2022 15:32:01 +0100
Subject: [PATCH 035/127] Final version

---
 chapters/it/chapter0/1.mdx | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/chapters/it/chapter0/1.mdx b/chapters/it/chapter0/1.mdx
index e8f7c5bf6..844edc4f1 100644
--- a/chapters/it/chapter0/1.mdx
+++ b/chapters/it/chapter0/1.mdx
@@ -1,18 +1,18 @@
 # Introduzione
 
-Benvenuto/a al corso di Hugging Face! In questo capitolo introduttivo, ti aiuteremo a configurare il tuo ambiente di lavoro. Se non hai ancora cominciato il corso, ti consigliamo di iniziare col dare un occhio al [Capitolo 1](/course/chapter1), per poi tornare qui a creare il tuo ambiente e lavorare al codice.
+Benvenuto/a al corso di Hugging Face! In questo capitolo introduttivo, ti aiuteremo a configurare il tuo ambiente di lavoro. Se non hai ancora cominciato il corso, ti consigliamo di dare prima un occhio al [Capitolo 1](/course/chapter1), per poi tornare qui a creare il tuo ambiente e cominciare a lavorare al codice.
 
-Tutte le librerie che useremo in questo corso sono disponibili come pacchetti Python, e qui ti mostreremo dapprima come configurare un ambiente Python e in seguito come installare le librerie di cui avrai bisogno.
+Tutte le librerie che useremo in questo corso sono disponibili come pacchetti Python. Qui ti mostreremo dapprima come configurare un ambiente Python e in seguito come installare le librerie di cui avrai bisogno.
 
 Copriremo due modi per configurare un ambiente di lavoro: usando un blocco note Colab, oppure un ambiente virtuale in Python. Sentiti libero/a di scegliere quello che ti sembra più adatto a te. Se sei un/a principiante, ti consigliamo vivamente di cominciare a lavorare con un blocco note Colab.
 
 Nota che non copriremo Windows. Se utilizzi Windows come sistema operativo, il nostro consiglio è di seguire il corso utilizzando un blocco note Colab. Se invece utilizzi Linux oppure macOS, puoi scegliere uno qualsiasi degli approcci descritti qui in seguito.
 
-Buona parte del corso richiede un account di Hugging Face. Ti consigliamo dunque di crearne uno al più presto: [Crea un account](https://huggingface.co/join).
+Buona parte del corso richiede un profilo di Hugging Face. Ti consigliamo dunque di crearne uno al più presto: [Crea un profilo](https://huggingface.co/join).
 
 ## Come usare un blocco note Colab di Google
 
-Il modo più semplice per configurare il tuo ambiente di lavoro è utilizzando Google Colab: una volta avviato un blocco note nel browser, puoi iniziare immediatamente a programmare! 
+Il modo più semplice di configurare il tuo ambiente di lavoro è utilizzando Google Colab: una volta avviato un blocco note nel browser, puoi iniziare immediatamente a programmare! 
 
 Se non conosci bene Colab, ti raccomandiamo di iniziare dalla seguente [introduzione](https://colab.research.google.com/notebooks/intro.ipynb). Colab permette di utilizzare accelerazioni hardware come GPU o TPU, ed è gratuito per i carichi di lavoro più piccoli.
 
@@ -22,13 +22,13 @@ Quando ti sentirai a tuo agio con Colab, crea un nuovo blocco note e inizia la c
 <img src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter0/new_colab.png" alt="An empty colab notebook" width="80%"/>
 </div>
 
-Il passo successivo consiste nell'installare le librerie che utilizzerai in questo corso. Per l'installazione, useremo `pip`, ossia il gestore di pacchetti di Python. In Google Colab, puoi inizializzare i tuoi comandi di sistema facendone precedere il nome dal carattere `!`. La libreria Transformer di 🤗 verrà quindi installata come segue:
+Il passo successivo consiste nell'installare le librerie che utilizzerai in questo corso. Per l'installazione, useremo `pip`, ossia il gestore di pacchetti di Python. In Google Colab, puoi inizializzare i tuoi comandi di sistema facendone precedere il nome dal carattere `!`. La libreria 🤗 Transformers verrà quindi installata come segue:
 
 ```
 !pip install transformers
 ```
 
-Puoi assicurarti che il pacchetto sia stato installato correttamente importandolo nel tuo runtime in Python:
+Puoi assicurarti che il pacchetto sia stato installato correttamente importandolo in Python:
 
 ```
 import transformers
@@ -38,7 +38,7 @@ import transformers
 <img src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter0/install.gif" alt="A gif showing the result of the two commands above: installation and import" width="80%"/>
 </div>
 
-Quest'operazione installa una versione molto leggera dei Transformer di 🤗 che non importa nessun quadro strutturale (*framework*) di apprendimento automatico (*machine learning*), come ad esempio PyTorch o TensorFlow. Dato che useremo numerose caratteristiche (*features*) della libreria, ti raccomandiamo l'installazione della versione per sviluppatori. Questa contiene praticamente tutte le dipendenze possibili e immaginabili:
+Quest'operazione installa una versione molto leggera degli 🤗 Transformers che non importa nessun framework (*quadro strutturale*) di machine learning (*apprendimento automatico*), come ad esempio PyTorch o TensorFlow. Dato che useremo numerose features (*caratteristiche*) della libreria, ti raccomandiamo l'installazione della versione per sviluppatori. Questa contiene praticamente tutte le dipendenze possibili e immaginabili:
 
 ```
 !pip install transformers[sentencepiece]
@@ -52,15 +52,15 @@ Se preferisci utilizzare un ambiente virtuale in Python, il primo passo consiste
 
 Quando avrai installato Python, dovresti riuscire a eseguire qualsiasi comando in Python sul terminale. Prima di procedere ai passi successivi, prova a eseguire il seguente comando per assicurarti che Python sia installato correttamente: `python --version`. Il comando dovrebbe stampare il nome della versione di Python installata nella tua macchina.
 
-Quando esegui un comando in Python dal terminale, come ad esempio `python --version`, ti consigliamo di considerare il programma che esegue il tuo comando l'installazione "principale" di Python del tuo sistema. La nostra raccomandazione è di tenere quest'installazione principale libera da pacchetti di ogni tipo, e di usarla per creare ambienti diversi per ogni applicazione alla quale lavorerai. In questo modo, ogni applicazione avrà le proprie dipendenze e i propri pacchetti, e non dovrai preoccuparti di eventuali problemi di compatibilità con altre applicazioni.
+Quando esegui un comando in Python dal terminale, come ad esempio `python --version`, ti consigliamo di considerare il programma che esegue il tuo comando come l'installazione "principale" di Python del tuo sistema. La nostra raccomandazione è di tenere quest'installazione principale libera da pacchetti di ogni tipo, e di usarla per creare ambienti diversi per ogni applicazione alla quale lavorerai. In questo modo, ogni applicazione avrà le proprie dipendenze e i propri pacchetti, e non dovrai preoccuparti di eventuali problemi di compatibilità con altre applicazioni.
 
-In Python, quest'operazione si effettua utilizzando gli [*ambienti virtuali*](https://docs.python.org/3/tutorial/venv.html) (*virtual environments*). Questi ultimi sono  degli alberi di directory autonomi che contengono installazioni di Python diverse, ossia particolari versioni di Python unite a tutti i pacchetti richiesti da una certa applicazione. La creazione di ambienti virtuali di questo tipo si può attuare a mezzo di strumenti diversi, anche se qui useremo esclusivamente il pacchetto ufficiale di Python, [`venv`](https://docs.python.org/3/library/venv.html#module-venv).
+In Python, quest'operazione si effettua utilizzando i [virtual environments](https://docs.python.org/3/tutorial/venv.html) (*ambienti virtuali*). Questi ultimi sono degli alberi di directory autonomi che contengono installazioni di Python diverse, ossia particolari versioni di Python unite a tutti i pacchetti richiesti da una certa applicazione. La creazione di ambienti virtuali di questo tipo si può attuare a mezzo di strumenti diversi, anche se qui useremo esclusivamente il pacchetto ufficiale di Python, [`venv`](https://docs.python.org/3/library/venv.html#module-venv).
 
-Innanzitutto, crea la cartella che ospiterà l'applicazione in questione, come ad esempio una cartella di nome *corso-transformers* alla radice della tua home directory:
+Innanzitutto, crea la cartella che ospiterà l'applicazione in questione, come ad esempio una cartella di nome *transformer-course* alla radice della tua home directory:
 
 ```
-mkdir ~/corso-transformers
-cd ~/corso-transformers
+mkdir ~/transformer-course
+cd ~/transformer-course
 ```
 
 All'interno di questa cartella, crea un ambiente virtuale utilizzando il modulo `venv` di Python:
@@ -82,10 +82,10 @@ ls -a
 Puoi entrare e uscire dall'ambiente virtuale utilizzando gli script `activate` e `deactivate`:
 
 ```
-# Attiva l'ambiente virtuale
+# Activate the virtual environment
 source .env/bin/activate
 
-# Disattiva l'ambiente virtuale
+# Deactivate the virtual environment
 source .env/bin/deactivate
 ```
 
@@ -96,12 +96,12 @@ which python
 ```
 
 ```out
-/home/<user>/corso-transformers/.env/bin/python
+/home/<user>/transformer-course/.env/bin/python
 ```
 
 ### Installazione dipendenze
 
-Come già menzionato nella sezione su Google Colab, il passo successivo consiste nell'installazione dei pacchetti richiesti dal corso. Ancora una volta, ti chiediamo di installare la versione per sviluppatori dei Transformer di 🤗 utilizzando il gestore di pacchetti `pip`:
+Come già menzionato nella sezione su Google Colab, il passo successivo consiste nell'installazione dei pacchetti richiesti dal corso. Ancora una volta, ti chiediamo di installare la versione per sviluppatori degli 🤗 Transformers utilizzando il gestore di pacchetti `pip`:
 
 ```
 pip install "transformers[sentencepiece]"

From a6c853b9915b02a04f8f204b52f874caa5f14dee Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Thu, 7 Apr 2022 19:47:32 +0100
Subject: [PATCH 036/127] Delete 1.mdx

---
 1.mdx | 20 --------------------
 1 file changed, 20 deletions(-)
 delete mode 100644 1.mdx

diff --git a/1.mdx b/1.mdx
deleted file mode 100644
index 9ab184b82..000000000
--- a/1.mdx
+++ /dev/null
@@ -1,20 +0,0 @@
-# Introduction
-
-As you saw in [Chapter 1](/course/chapter1), Transformer models are usually very large. With millions to tens of *billions* of parameters, training and deploying these models is a complicated undertaking. Furthermore, with new models being released on a near-daily basis and each having its own implementation, trying them all out is no easy task.
-
-The 🤗 Transformers library was created to solve this problem. Its goal is to provide a single API through which any Transformer model can be loaded, trained, and saved. The library's main features are:
-
-- **Ease of use**: Downloading, loading, and using a state-of-the-art NLP model for inference can be done in just two lines of code.
-- **Flexibility**: At their core, all models are simple PyTorch `nn.Module` or TensorFlow `tf.keras.Model` classes and can be handled like any other models in their respective machine learning (ML) frameworks.
-- **Simplicity**: Hardly any abstractions are made across the library. The "All in one file" is a core concept: a model's forward pass is entirely defined in a single file, so that the code itself is understandable and hackable.
-
-This last feature makes 🤗 Transformers quite different from other ML libraries. The models are not built on modules 
-that are shared across files; instead, each model has its own layers. In addition to making the models more approachable and understandable, this allows you to easily experiment on one model without affecting others.
-
-This chapter will begin with an end-to-end example where we use a model and a tokenizer together to replicate the `pipeline()` function introduced in [Chapter 1](/course/chapter1). Next, we'll discuss the model API: we'll dive into the model and configuration classes, and show you how to load a model and how it processes numerical inputs to output predictions. 
-
-Then we'll look at the tokenizer API, which is the other main component of the `pipeline()` function. Tokenizers take care of the first and last processing steps, handling the conversion from text to numerical inputs for the neural network, and the conversion back to text when it is needed. Finally, we'll show you how to handle sending multiple sentences through a model in a prepared batch, then wrap it all up with a closer look at the high-level `tokenizer()` function.
-
-<Tip>
-⚠️ In order to benefit from all features available with the Model Hub and 🤗 Transformers, we recommend <a href="https://huggingface.co/join">creating an account</a>.
-</Tip>
\ No newline at end of file

From 3fb98f877eb66d60be0fb130f96f18925022837b Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Thu, 7 Apr 2022 19:47:51 +0100
Subject: [PATCH 037/127] Delete 2.mdx

---
 2.mdx | 353 ----------------------------------------------------------
 1 file changed, 353 deletions(-)
 delete mode 100644 2.mdx

diff --git a/2.mdx b/2.mdx
deleted file mode 100644
index a7715efc7..000000000
--- a/2.mdx
+++ /dev/null
@@ -1,353 +0,0 @@
-<FrameworkSwitchCourse {fw} />
-
-# Behind the pipeline
-
-{#if fw === 'pt'}
-
-<DocNotebookDropdown
-  classNames="absolute z-10 right-0 top-0"
-  options={[
-    {label: "Google Colab", value: "https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter2/section2_pt.ipynb"},
-    {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter2/section2_pt.ipynb"},
-]} />
-
-{:else}
-
-<DocNotebookDropdown
-  classNames="absolute z-10 right-0 top-0"
-  options={[
-    {label: "Google Colab", value: "https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter2/section2_tf.ipynb"},
-    {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter2/section2_tf.ipynb"},
-]} />
-
-{/if}
-
-<Tip>
-This is the first section where the content is slightly different depending on whether you use PyTorch and TensorFlow. Toogle the switch on top of the title to select the platform you prefer!
-</Tip>
-
-{#if fw === 'pt'}
-<Youtube id="1pedAIvTWXk"/>
-{:else}
-<Youtube id="wVN12smEvqg"/>
-{/if}
-
-Let's start with a complete example, taking a look at what happened behind the scenes when we executed the following code in [Chapter 1](/course/chapter1):
-
-```python
-from transformers import pipeline
-
-classifier = pipeline("sentiment-analysis")
-classifier(
-    [
-        "I've been waiting for a HuggingFace course my whole life.",
-        "I hate this so much!",
-    ]
-)
-```
-
-and obtained:
-
-```python out
-[{'label': 'POSITIVE', 'score': 0.9598047137260437},
- {'label': 'NEGATIVE', 'score': 0.9994558095932007}]
-```
-
-As we saw in [Chapter 1](/course/chapter1), this pipeline groups together three steps: preprocessing, passing the inputs through the model, and postprocessing:
-
-<div class="flex justify-center">
-<img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter2/full_nlp_pipeline.svg" alt="The full NLP pipeline: tokenization of text, conversion to IDs, and inference through the Transformer model and the model head."/>
-<img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter2/full_nlp_pipeline-dark.svg" alt="The full NLP pipeline: tokenization of text, conversion to IDs, and inference through the Transformer model and the model head."/>
-</div>
-
-Let's quickly go over each of these.
-
-## Preprocessing with a tokenizer
-
-Like other neural networks, Transformer models can't process raw text directly, so the first step of our pipeline is to convert the text inputs into numbers that the model can make sense of. To do this we use a *tokenizer*, which will be responsible for:
-
-- Splitting the input into words, subwords, or symbols (like punctuation) that are called *tokens*
-- Mapping each token to an integer
-- Adding additional inputs that may be useful to the model
-
-All this preprocessing needs to be done in exactly the same way as when the model was pretrained, so we first need to download that information from the [Model Hub](https://huggingface.co/models). To do this, we use the `AutoTokenizer` class and its `from_pretrained()` method. Using the checkpoint name of our model, it will automatically fetch the data associated with the model's tokenizer and cache it (so it's only downloaded the first time you run the code below).
-
-Since the default checkpoint of the `sentiment-analysis` pipeline is `distilbert-base-uncased-finetuned-sst-2-english` (you can see its model card [here](https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english)), we run the following:
-
-```python
-from transformers import AutoTokenizer
-
-checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
-tokenizer = AutoTokenizer.from_pretrained(checkpoint)
-```
-
-Once we have the tokenizer, we can directly pass our sentences to it and we'll get back a dictionary that's ready to feed to our model! The only thing left to do is to convert the list of input IDs to tensors.
-
-You can use 🤗 Transformers without having to worry about which ML framework is used as a backend; it might be PyTorch or TensorFlow, or Flax for some models. However, Transformer models only accept *tensors* as input. If this is your first time hearing about tensors, you can think of them as NumPy arrays instead. A NumPy array can be a scalar (0D), a vector (1D), a matrix (2D), or have more dimensions. It's effectively a tensor; other ML frameworks' tensors behave similarly, and are usually as simple to instantiate as NumPy arrays.
-
-To specify the type of tensors we want to get back (PyTorch, TensorFlow, or plain NumPy), we use the `return_tensors` argument:
-
-{#if fw === 'pt'}
-```python
-raw_inputs = [
-    "I've been waiting for a HuggingFace course my whole life.",
-    "I hate this so much!",
-]
-inputs = tokenizer(raw_inputs, padding=True, truncation=True, return_tensors="pt")
-print(inputs)
-```
-{:else}
-```python
-raw_inputs = [
-    "I've been waiting for a HuggingFace course my whole life.",
-    "I hate this so much!",
-]
-inputs = tokenizer(raw_inputs, padding=True, truncation=True, return_tensors="tf")
-print(inputs)
-```
-{/if}
-
-Don't worry about padding and truncation just yet; we'll explain those later. The main things to remember here are that you can pass one sentence or a list of sentences, as well as specifying the type of tensors you want to get back (if no type is passed, you will get a list of lists as a result).
-
-{#if fw === 'pt'}
-
-Here's what the results look like as PyTorch tensors:
-
-```python out
-{
-    'input_ids': tensor([
-        [  101,  1045,  1005,  2310,  2042,  3403,  2005,  1037, 17662, 12172, 2607,  2026,  2878,  2166,  1012,   102],
-        [  101,  1045,  5223,  2023,  2061,  2172,   999,   102,     0,     0,     0,     0,     0,     0,     0,     0]
-    ]), 
-    'attention_mask': tensor([
-        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
-        [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
-    ])
-}
-```
-{:else}
-
-Here's what the results look like as TensorFlow tensors:
-
-```python out
-{
-    'input_ids': <tf.Tensor: shape=(2, 16), dtype=int32, numpy=
-        array([
-            [  101,  1045,  1005,  2310,  2042,  3403,  2005,  1037, 17662, 12172,  2607,  2026,  2878,  2166,  1012,   102],
-            [  101,  1045,  5223,  2023,  2061,  2172,   999,   102,     0,     0,     0,     0,     0,     0,     0,     0]
-        ], dtype=int32)>, 
-    'attention_mask': <tf.Tensor: shape=(2, 16), dtype=int32, numpy=
-        array([
-            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
-            [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
-        ], dtype=int32)>
-}
-```
-{/if}
-
-The output itself is a dictionary containing two keys, `input_ids` and `attention_mask`. `input_ids` contains two rows of integers (one for each sentence) that are the unique identifiers of the tokens in each sentence. We'll explain what the `attention_mask` is later in this chapter. 
-
-## Going through the model
-
-{#if fw === 'pt'}
-We can download our pretrained model the same way we did with our tokenizer. 🤗 Transformers provides an `AutoModel` class which also has a `from_pretrained()` method:
-
-```python
-from transformers import AutoModel
-
-checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
-model = AutoModel.from_pretrained(checkpoint)
-```
-{:else}
-We can download our pretrained model the same way we did with our tokenizer. 🤗 Transformers provides an `TFAutoModel` class which also has a `from_pretrained` method:
-
-```python
-from transformers import TFAutoModel
-
-checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
-model = TFAutoModel.from_pretrained(checkpoint)
-```
-{/if}
-
-In this code snippet, we have downloaded the same checkpoint we used in our pipeline before (it should actually have been cached already) and instantiated a model with it.
-
-This architecture contains only the base Transformer module: given some inputs, it outputs what we'll call *hidden states*, also known as *features*. For each model input, we'll retrieve a high-dimensional vector representing the **contextual understanding of that input by the Transformer model**.
-
-If this doesn't make sense, don't worry about it. We'll explain it all later.
-
-While these hidden states can be useful on their own, they're usually inputs to another part of the model, known as the *head*. In [Chapter 1](/course/chapter1), the different tasks could have been performed with the same architecture, but each of these tasks will have a different head associated with it.
-
-### A high-dimensional vector?
-
-The vector output by the Transformer module is usually large. It generally has three dimensions:
-
-- **Batch size**: The number of sequences processed at a time (2 in our example).
-- **Sequence length**: The length of the numerical representation of the sequence (16 in our example).
-- **Hidden size**: The vector dimension of each model input.
-
-It is said to be "high dimensional" because of the last value. The hidden size can be very large (768 is common for smaller models, and in larger models this can reach 3072 or more).
-
-We can see this if we feed the inputs we preprocessed to our model:
-
-{#if fw === 'pt'}
-```python
-outputs = model(**inputs)
-print(outputs.last_hidden_state.shape)
-```
-
-```python out
-torch.Size([2, 16, 768])
-```
-{:else}
-```py
-outputs = model(inputs)
-print(outputs.last_hidden_state.shape)
-```
-
-```python out
-(2, 16, 768)
-```
-{/if}
-
-Note that the outputs of 🤗 Transformers models behave like `namedtuple`s or dictionaries. You can access the elements by attributes (like we did) or by key (`outputs["last_hidden_state"]`), or even by index if you know exactly where the thing you are looking for is (`outputs[0]`).
-
-### Model heads: Making sense out of numbers
-
-The model heads take the high-dimensional vector of hidden states as input and project them onto a different dimension. They are usually composed of one or a few linear layers:
-
-<div class="flex justify-center">
-<img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter2/transformer_and_head.svg" alt="A Transformer network alongside its head."/>
-<img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter2/transformer_and_head-dark.svg" alt="A Transformer network alongside its head."/>
-</div>
-
-The output of the Transformer model is sent directly to the model head to be processed.
-
-In this diagram, the model is represented by its embeddings layer and the subsequent layers. The embeddings layer converts each input ID in the tokenized input into a vector that represents the associated token. The subsequent layers manipulate those vectors using the attention mechanism to produce the final representation of the sentences.
-
-There are many different architectures available in 🤗 Transformers, with each one designed around tackling a specific task. Here is a non-exhaustive list:
-
-- `*Model` (retrieve the hidden states)
-- `*ForCausalLM`
-- `*ForMaskedLM`
-- `*ForMultipleChoice`
-- `*ForQuestionAnswering`
-- `*ForSequenceClassification`
-- `*ForTokenClassification`
-- and others 🤗
-
-{#if fw === 'pt'}
-For our example, we will need a model with a sequence classification head (to be able to classify the sentences as positive or negative). So, we won't actually use the `AutoModel` class, but `AutoModelForSequenceClassification`:
-
-```python
-from transformers import AutoModelForSequenceClassification
-
-checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
-model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
-outputs = model(**inputs)
-```
-{:else}
-For our example, we will need a model with a sequence classification head (to be able to classify the sentences as positive or negative). So, we won't actually use the `TFAutoModel` class, but `TFAutoModelForSequenceClassification`:
-
-```python
-from transformers import TFAutoModelForSequenceClassification
-
-checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
-model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint)
-outputs = model(inputs)
-```
-{/if}
-
-Now if we look at the shape of our inputs, the dimensionality will be much lower: the model head takes as input the high-dimensional vectors we saw before, and outputs vectors containing two values (one per label):
-
-```python
-print(outputs.logits.shape)
-```
-
-{#if fw === 'pt'}
-```python out
-torch.Size([2, 2])
-```
-{:else}
-```python out
-(2, 2)
-```
-{/if}
-
-Since we have just two sentences and two labels, the result we get from our model is of shape 2 x 2.
-
-## Postprocessing the output
-
-The values we get as output from our model don't necessarily make sense by themselves. Let's take a look:
-
-```python
-print(outputs.logits)
-```
-
-{#if fw === 'pt'}
-```python out
-tensor([[-1.5607,  1.6123],
-        [ 4.1692, -3.3464]], grad_fn=<AddmmBackward>)
-```
-{:else}
-```python out
-<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
-    array([[-1.5606991,  1.6122842],
-           [ 4.169231 , -3.3464472]], dtype=float32)>
-```
-{/if}
-
-Our model predicted `[-1.5607, 1.6123]` for the first sentence and `[ 4.1692, -3.3464]` for the second one. Those are not probabilities but *logits*, the raw, unnormalized scores outputted by the last layer of the model. To be converted to probabilities, they need to go through a [SoftMax](https://en.wikipedia.org/wiki/Softmax_function) layer (all 🤗 Transformers models output the logits, as the loss function for training will generally fuse the last activation function, such as SoftMax, with the actual loss function, such as cross entropy):
-
-{#if fw === 'pt'}
-```py
-import torch
-
-predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
-print(predictions)
-```
-{:else}
-```py
-import tensorflow as tf
-
-predictions = tf.math.softmax(outputs.logits, axis=-1)
-print(predictions)
-```
-{/if}
-
-{#if fw === 'pt'}
-```python out
-tensor([[4.0195e-02, 9.5980e-01],
-        [9.9946e-01, 5.4418e-04]], grad_fn=<SoftmaxBackward>)
-```
-{:else}
-```python out
-tf.Tensor(
-[[4.01951671e-02 9.59804833e-01]
- [9.9945587e-01 5.4418424e-04]], shape=(2, 2), dtype=float32)
-```
-{/if}
-
-Now we can see that the model predicted `[0.0402, 0.9598]` for the first sentence and `[0.9995,  0.0005]` for the second one. These are recognizable probability scores.
-
-To get the labels corresponding to each position, we can inspect the `id2label` attribute of the model config (more on this in the next section):
-
-```python
-model.config.id2label
-```
-
-```python out
-{0: 'NEGATIVE', 1: 'POSITIVE'}
-```
-
-Now we can conclude that the model predicted the following:
- 
-- First sentence: NEGATIVE: 0.0402, POSITIVE: 0.9598
-- Second sentence: NEGATIVE: 0.9995, POSITIVE: 0.0005
-
-We have successfully reproduced the three steps of the pipeline: preprocessing with tokenizers, passing the inputs through the model, and postprocessing! Now let's take some time to dive deeper into each of those steps.
-
-<Tip>
-
-✏️ **Try it out!** Choose two (or more) texts of your own and run them through the `sentiment-analysis` pipeline. Then replicate the steps you saw here yourself and check that you obtain the same results!
-
-</Tip>

From 817c1539aa4b9ca4dc58466febde47e521131651 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Thu, 7 Apr 2022 19:48:01 +0100
Subject: [PATCH 038/127] Delete 3.mdx

---
 3.mdx | 228 ----------------------------------------------------------
 1 file changed, 228 deletions(-)
 delete mode 100644 3.mdx

diff --git a/3.mdx b/3.mdx
deleted file mode 100644
index c9100c42c..000000000
--- a/3.mdx
+++ /dev/null
@@ -1,228 +0,0 @@
-<FrameworkSwitchCourse {fw} />
-
-# Models
-
-{#if fw === 'pt'}
-
-<DocNotebookDropdown
-  classNames="absolute z-10 right-0 top-0"
-  options={[
-    {label: "Google Colab", value: "https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter2/section3_pt.ipynb"},
-    {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter2/section3_pt.ipynb"},
-]} />
-
-{:else}
-
-<DocNotebookDropdown
-  classNames="absolute z-10 right-0 top-0"
-  options={[
-    {label: "Google Colab", value: "https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter2/section3_tf.ipynb"},
-    {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter2/section3_tf.ipynb"},
-]} />
-
-{/if}
-
-{#if fw === 'pt'}
-<Youtube id="AhChOFRegn4"/>
-{:else}
-<Youtube id="d3JVgghSOew"/>
-{/if}
-
-{#if fw === 'pt'}
-In this section we'll take a closer look at creating and using a model. We'll use the `AutoModel` class, which is handy when you want to instantiate any model from a checkpoint.
-
-The `AutoModel` class and all of its relatives are actually simple wrappers over the wide variety of models available in the library. It's a clever wrapper as it can automatically guess the appropriate model architecture for your checkpoint, and then instantiates a model with this architecture.
-
-{:else}
-In this section we'll take a closer look at creating and using a model. We'll use the `TFAutoModel` class, which is handy when you want to instantiate any model from a checkpoint.
-
-The `TFAutoModel` class and all of its relatives are actually simple wrappers over the wide variety of models available in the library. It's a clever wrapper as it can automatically guess the appropriate model architecture for your checkpoint, and then instantiates a model with this architecture.
-
-{/if}
-
-However, if you know the type of model you want to use, you can use the class that defines its architecture directly. Let's take a look at how this works with a BERT model.
-
-## Creating a Transformer
-
-The first thing we'll need to do to initialize a BERT model is load a configuration object:
-
-{#if fw === 'pt'}
-```py
-from transformers import BertConfig, BertModel
-
-# Building the config
-config = BertConfig()
-
-# Building the model from the config
-model = BertModel(config)
-```
-{:else}
-```py
-from transformers import BertConfig, TFBertModel
-
-# Building the config
-config = BertConfig()
-
-# Building the model from the config
-model = TFBertModel(config)
-```
-{/if}
-
-The configuration contains many attributes that are used to build the model:
-
-```py
-print(config)
-```
-
-```python out
-BertConfig {
-  [...]
-  "hidden_size": 768,
-  "intermediate_size": 3072,
-  "max_position_embeddings": 512,
-  "num_attention_heads": 12,
-  "num_hidden_layers": 12,
-  [...]
-}
-```
-
-While you haven't seen what all of these attributes do yet, you should recognize some of them: the `hidden_size` attribute defines the size of the `hidden_states` vector, and `num_hidden_layers` defines the number of layers the Transformer model has.
-
-### Different loading methods
-
-Creating a model from the default configuration initializes it with random values:
-
-{#if fw === 'pt'}
-```py
-from transformers import BertConfig, BertModel
-
-config = BertConfig()
-model = BertModel(config)
-
-# Model is randomly initialized!
-```
-{:else}
-```py
-from transformers import BertConfig, TFBertModel
-
-config = BertConfig()
-model = TFBertModel(config)
-
-# Model is randomly initialized!
-```
-{/if}
-
-The model can be used in this state, but it will output gibberish; it needs to be trained first. We could train the model from scratch on the task at hand, but as you saw in [Chapter 1](/course/chapter1), this would require a long time and a lot of data, and it would have a non-negligible environmental impact. To avoid unnecessary and duplicated effort, it's imperative to be able to share and reuse models that have already been trained.
-
-Loading a Transformer model that is already trained is simple — we can do this using the `from_pretrained()` method:
-
-{#if fw === 'pt'}
-```py
-from transformers import BertModel
-
-model = BertModel.from_pretrained("bert-base-cased")
-```
-
-As you saw earlier, we could replace `BertModel` with the equivalent `AutoModel` class. We'll do this from now on as this produces checkpoint-agnostic code; if your code works for one checkpoint, it should work seamlessly with another. This applies even if the architecture is different, as long as the checkpoint was trained for a similar task (for example, a sentiment analysis task).
-
-{:else}
-```py
-from transformers import TFBertModel
-
-model = TFBertModel.from_pretrained("bert-base-cased")
-```
-
-As you saw earlier, we could replace `TFBertModel` with the equivalent `TFAutoModel` class. We'll do this from now on as this produces checkpoint-agnostic code; if your code works for one checkpoint, it should work seamlessly with another. This applies even if the architecture is different, as long as the checkpoint was trained for a similar task (for example, a sentiment analysis task).
-
-{/if}
-
-In the code sample above we didn't use `BertConfig`, and instead loaded a pretrained model via the `bert-base-cased` identifier. This is a model checkpoint that was trained by the authors of BERT themselves; you can find more details about it in its [model card](https://huggingface.co/bert-base-cased).
-
-This model is now initialized with all the weights of the checkpoint. It can be used directly for inference on the tasks it was trained on, and it can also be fine-tuned on a new task. By training with pretrained weights rather than from scratch, we can quickly achieve good results.
-
-The weights have been downloaded and cached (so future calls to the `from_pretrained()` method won't re-download them) in the cache folder, which defaults to *~/.cache/huggingface/transformers*. You can customize your cache folder by setting the `HF_HOME` environment variable.
-
-The identifier used to load the model can be the identifier of any model on the Model Hub, as long as it is compatible with the BERT architecture. The entire list of available BERT checkpoints can be found [here](https://huggingface.co/models?filter=bert).
-
-### Saving methods
-
-Saving a model is as easy as loading one — we use the `save_pretrained()` method, which is analogous to the `from_pretrained()` method:
-
-```py
-model.save_pretrained("directory_on_my_computer")
-```
-
-This saves two files to your disk:
-
-{#if fw === 'pt'}
-```
-ls directory_on_my_computer
-
-config.json pytorch_model.bin
-```
-{:else}
-```
-ls directory_on_my_computer
-
-config.json tf_model.h5
-```
-{/if}
-
-If you take a look at the *config.json* file, you'll recognize the attributes necessary to build the model architecture. This file also contains some metadata, such as where the checkpoint originated and what 🤗 Transformers version you were using when you last saved the checkpoint.
-
-{#if fw === 'pt'}
-The *pytorch_model.bin* file is known as the *state dictionary*; it contains all your model's weights. The two files go hand in hand; the configuration is necessary to know your model's architecture, while the model weights are your model's parameters.
-
-{:else}
-The *tf_model.h5* file is known as the *state dictionary*; it contains all your model's weights. The two files go hand in hand; the configuration is necessary to know your model's architecture, while the model weights are your model's parameters.
-
-{/if}
-
-## Using a Transformer model for inference
-
-Now that you know how to load and save a model, let's try using it to make some predictions. Transformer models can only process numbers — numbers that the tokenizer generates. But before we discuss tokenizers, let's explore what inputs the model accepts.
-
-Tokenizers can take care of casting the inputs to the appropriate framework's tensors, but to help you understand what's going on, we'll take a quick look at what must be done before sending the inputs to the model.
-
-Let's say we have a couple of sequences:
-
-```py
-sequences = ["Hello!", "Cool.", "Nice!"]
-```
-
-The tokenizer converts these to vocabulary indices which are typically called *input IDs*. Each sequence is now a list of numbers! The resulting output is:
-
-```py no-format
-encoded_sequences = [
-    [101, 7592, 999, 102],
-    [101, 4658, 1012, 102],
-    [101, 3835, 999, 102],
-]
-```
-
-This is a list of encoded sequences: a list of lists. Tensors only accept rectangular shapes (think matrices). This "array" is already of rectangular shape, so converting it to a tensor is easy:
-
-{#if fw === 'pt'}
-```py
-import torch
-
-model_inputs = torch.tensor(encoded_sequences)
-```
-{:else}
-```py
-import tensorflow as tf
-
-model_inputs = tf.constant(encoded_sequences)
-```
-{/if}
-
-### Using the tensors as inputs to the model
-
-Making use of the tensors with the model is extremely simple — we just call the model with the inputs:
-
-```py
-output = model(model_inputs)
-```
-
-While the model accepts a lot of different arguments, only the input IDs are necessary. We'll explain what the other arguments do and when they are required later, 
-but first we need to take a closer look at the tokenizers that build the inputs that a Transformer model can understand.

From 02dc002eac28d7f6b39440e4259494a9c77b40f7 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Thu, 7 Apr 2022 19:48:14 +0100
Subject: [PATCH 039/127] Delete 4.mdx

---
 4.mdx | 240 ----------------------------------------------------------
 1 file changed, 240 deletions(-)
 delete mode 100644 4.mdx

diff --git a/4.mdx b/4.mdx
deleted file mode 100644
index ccebe04ec..000000000
--- a/4.mdx
+++ /dev/null
@@ -1,240 +0,0 @@
-<FrameworkSwitchCourse {fw} />
-
-# Tokenizers
-
-{#if fw === 'pt'}
-
-<DocNotebookDropdown
-  classNames="absolute z-10 right-0 top-0"
-  options={[
-    {label: "Google Colab", value: "https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter2/section4_pt.ipynb"},
-    {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter2/section4_pt.ipynb"},
-]} />
-
-{:else}
-
-<DocNotebookDropdown
-  classNames="absolute z-10 right-0 top-0"
-  options={[
-    {label: "Google Colab", value: "https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter2/section4_tf.ipynb"},
-    {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter2/section4_tf.ipynb"},
-]} />
-
-{/if}
-
-<Youtube id="VFp38yj8h3A"/>
-
-Tokenizers are one of the core components of the NLP pipeline. They serve one purpose: to translate text into data that can be processed by the model. Models can only process numbers, so tokenizers need to convert our text inputs to numerical data. In this section, we'll explore exactly what happens in the tokenization pipeline. 
-
-In NLP tasks, the data that is generally processed is raw text. Here's an example of such text:
-
-```
-Jim Henson was a puppeteer
-```
-
-However, models can only process numbers, so we need to find a way to convert the raw text to numbers. That's what the tokenizers do, and there are a lot of ways to go about this. The goal is to find the most meaningful representation — that is, the one that makes the most sense to the model — and, if possible, the smallest representation.
-
-Let's take a look at some examples of tokenization algorithms, and try to answer some of the questions you may have about tokenization.
-
-## Word-based
-
-<Youtube id="nhJxYji1aho"/>
-
-The first type of tokenizer that comes to mind is _word-based_. It's generally very easy to set up and use with only a few rules, and it often yields decent results. For example, in the image below, the goal is to split the raw text into words and find a numerical representation for each of them:
-
-<div class="flex justify-center">
-  <img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter2/word_based_tokenization.svg" alt="An example of word-based tokenization."/>
-  <img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter2/word_based_tokenization-dark.svg" alt="An example of word-based tokenization."/>
-</div>
-
-There are different ways to split the text. For example, we could could use whitespace to tokenize the text into words by applying Python's `split()` function:
-
-```py
-tokenized_text = "Jim Henson was a puppeteer".split()
-print(tokenized_text)
-```
-
-```python out
-['Jim', 'Henson', 'was', 'a', 'puppeteer']
-```
-
-There are also variations of word tokenizers that have extra rules for punctuation. With this kind of tokenizer, we can end up with some pretty large "vocabularies," where a vocabulary is defined by the total number of independent tokens that we have in our corpus.
-
-Each word gets assigned an ID, starting from 0 and going up to the size of the vocabulary. The model uses these IDs to identify each word.
-
-If we want to completely cover a language with a word-based tokenizer, we'll need to have an identifier for each word in the language, which will generate a huge amount of tokens. For example, there are over 500,000 words in the English language, so to build a map from each word to an input ID we'd need to keep track of that many IDs. Furthermore, words like "dog" are represented differently from words like "dogs", and the model will initially have no way of knowing that "dog" and "dogs" are similar: it will identify the two words as unrelated. The same applies to other similar words, like "run" and "running", which the model will not see as being similar initially.
-
-Finally, we need a custom token to represent words that are not in our vocabulary. This is known as the "unknown" token, often represented as "[UNK]" or "&lt;unk&gt;". It's generally a bad sign if you see that the tokenizer is producing a lot of these tokens, as it wasn't able to retrieve a sensible representation of a word and you're losing information along the way. The goal when crafting the vocabulary is to do it in such a way that the tokenizer tokenizes as few words as possible into the unknown token.
-
-One way to reduce the amount of unknown tokens is to go one level deeper, using a _character-based_ tokenizer.
-
-## Character-based
-
-<Youtube id="ssLq_EK2jLE"/>
-
-Character-based tokenizers split the text into characters, rather than words. This has two primary benefits:
-
-- The vocabulary is much smaller.
-- There are much fewer out-of-vocabulary (unknown) tokens, since every word can be built from characters.
-
-But here too some questions arise concerning spaces and punctuation:
-
-<div class="flex justify-center">
-  <img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter2/character_based_tokenization.svg" alt="An example of character-based tokenization."/>
-  <img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter2/character_based_tokenization-dark.svg" alt="An example of character-based tokenization."/>
-</div>
-
-This approach isn't perfect either. Since the representation is now based on characters rather than words, one could argue that, intuitively, it's less meaningful: each character doesn't mean a lot on its own, whereas that is the case with words. However, this again differs according to the language; in Chinese, for example, each character carries more information than a character in a Latin language.
-
-Another thing to consider is that we'll end up with a very large amount of tokens to be processed by our model: whereas a word would only be a single token with a word-based tokenizer, it can easily turn into 10 or more tokens when converted into characters.
-
-To get the best of both worlds, we can use a third technique that combines the two approaches: *subword tokenization*.
-
-## Subword tokenization
-
-<Youtube id="zHvTiHr506c"/>
-
-Subword tokenization algorithms rely on the principle that frequently used words should not be split into smaller subwords, but rare words should be decomposed into meaningful subwords.
-
-For instance, "annoyingly" might be considered a rare word and could be decomposed into "annoying" and "ly". These are both likely to appear more frequently as standalone subwords, while at the same time the meaning of "annoyingly" is kept by the composite meaning of "annoying" and "ly".
-
-Here is an example showing how a subword tokenization algorithm would tokenize the sequence "Let's do tokenization!":
-
-<div class="flex justify-center">
-  <img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter2/bpe_subword.svg" alt="A subword tokenization algorithm."/>
-  <img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter2/bpe_subword-dark.svg" alt="A subword tokenization algorithm."/>
-</div>
-
-These subwords end up providing a lot of semantic meaning: for instance, in the example above "tokenization" was split into "token" and "ization", two tokens that have a semantic meaning while being space-efficient (only two tokens are needed to represent a long word). This allows us to have relatively good coverage with small vocabularies, and close to no unknown tokens.
-
-This approach is especially useful in agglutinative languages such as Turkish, where you can form (almost) arbitrarily long complex words by stringing together subwords.
-
-### And more!
-
-Unsurprisingly, there are many more techniques out there. To name a few:
-
-- Byte-level BPE, as used in GPT-2
-- WordPiece, as used in BERT
-- SentencePiece or Unigram, as used in several multilingual models
-
-You should now have sufficient knowledge of how tokenizers work to get started with the API.
-
-## Loading and saving
-
-Loading and saving tokenizers is as simple as it is with models. Actually, it's based on the same two methods: `from_pretrained()` and `save_pretrained()`. These methods will load or save the algorithm used by the tokenizer (a bit like the *architecture* of the model) as well as its vocabulary (a bit like the *weights* of the model).
-
-Loading the BERT tokenizer trained with the same checkpoint as BERT is done the same way as loading the model, except we use the `BertTokenizer` class:
-
-```py
-from transformers import BertTokenizer
-
-tokenizer = BertTokenizer.from_pretrained("bert-base-cased")
-```
-
-{#if fw === 'pt'}
-Similar to `AutoModel`, the `AutoTokenizer` class will grab the proper tokenizer class in the library based on the checkpoint name, and can be used directly with any checkpoint:
-
-{:else}
-Similar to `TFAutoModel`, the `AutoTokenizer` class will grab the proper tokenizer class in the library based on the checkpoint name, and can be used directly with any checkpoint:
-
-{/if}
-
-```py
-from transformers import AutoTokenizer
-
-tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
-```
-
-We can now use the tokenizer as shown in the previous section:
-
-```python
-tokenizer("Using a Transformer network is simple")
-```
-
-```python out
-{'input_ids': [101, 7993, 170, 11303, 1200, 2443, 1110, 3014, 102],
- 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0],
- 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1]}
-```
-
-Saving a tokenizer is identical to saving a model:
-
-```py
-tokenizer.save_pretrained("directory_on_my_computer")
-```
-
-We'll talk more about `token_type_ids` in [Chapter 3](/course/chapter3), and we'll explain the `attention_mask` key a little later. First, let's see how the `input_ids` are generated. To do this, we'll need to look at the intermediate methods of the tokenizer.
-
-## Encoding
-
-<Youtube id="Yffk5aydLzg"/>
-
-Translating text to numbers is known as _encoding_. Encoding is done in a two-step process: the tokenization, followed by the conversion to input IDs.
-
-As we've seen, the first step is to split the text into words (or parts of words, punctuation symbols, etc.), usually called *tokens*. There are multiple rules that can govern that process, which is why we need to instantiate the tokenizer using the name of the model, to make sure we use the same rules that were used when the model was pretrained.
-
-The second step is to convert those tokens into numbers, so we can build a tensor out of them and feed them to the model. To do this, the tokenizer has a *vocabulary*, which is the part we download when we instantiate it with the `from_pretrained()` method. Again, we need to use the same vocabulary used when the model was pretrained.
-
-To get a better understanding of the two steps, we'll explore them separately. Note that we will use some methods that perform parts of the tokenization pipeline separately to show you the intermediate results of those steps, but in practice, you should call the tokenizer directly on your inputs (as shown in the section 2).
-
-### Tokenization
-
-The tokenization process is done by the `tokenize()` method of the tokenizer:
-
-```py
-from transformers import AutoTokenizer
-
-tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
-
-sequence = "Using a Transformer network is simple"
-tokens = tokenizer.tokenize(sequence)
-
-print(tokens)
-```
-
-The output of this method is a list of strings, or tokens:
-
-```python out
-['Using', 'a', 'transform', '##er', 'network', 'is', 'simple']
-```
-
-This tokenizer is a subword tokenizer: it splits the words until it obtains tokens that can be represented by its vocabulary. That's the case here with `transformer`, which is split into two tokens: `transform` and `##er`.
-
-### From tokens to input IDs
-
-The conversion to input IDs is handled by the `convert_tokens_to_ids()` tokenizer method:
-
-```py
-ids = tokenizer.convert_tokens_to_ids(tokens)
-
-print(ids)
-```
-
-```python out
-[7993, 170, 11303, 1200, 2443, 1110, 3014]
-```
-
-These outputs, once converted to the appropriate framework tensor, can then be used as inputs to a model as seen earlier in this chapter.
-
-<Tip>
-
-✏️ **Try it out!** Replicate the two last steps (tokenization and conversion to input IDs) on the input sentences we used in section 2 ("I've been waiting for a HuggingFace course my whole life." and "I hate this so much!"). Check that you get the same input IDs we got earlier!
-
-</Tip>
-
-## Decoding
-
-*Decoding* is going the other way around: from vocabulary indices, we want to get a string. This can be done with the `decode()` method as follows:
-
-```py
-decoded_string = tokenizer.decode([7993, 170, 11303, 1200, 2443, 1110, 3014])
-print(decoded_string)
-```
-
-```python out
-'Using a Transformer network is simple'
-```
-
-Note that the `decode` method not only converts the indices back to tokens, but also groups together the tokens that were part of the same words to produce a readable sentence. This behavior will be extremely useful when we use models that predict new text (either text generated from a prompt, or for sequence-to-sequence problems like translation or summarization).
-
-By now you should understand the atomic operations a tokenizer can handle: tokenization, conversion to IDs, and converting IDs back to a string. However, we've just scraped the tip of the iceberg. In the following section, we'll take our approach to its limits and take a look at how to overcome them.

From 9488dfa68dccc03eb937024e4997547e40845470 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Thu, 7 Apr 2022 19:48:26 +0100
Subject: [PATCH 040/127] Delete 5.mdx

---
 5.mdx | 338 ----------------------------------------------------------
 1 file changed, 338 deletions(-)
 delete mode 100644 5.mdx

diff --git a/5.mdx b/5.mdx
deleted file mode 100644
index 5a692aa19..000000000
--- a/5.mdx
+++ /dev/null
@@ -1,338 +0,0 @@
-<FrameworkSwitchCourse {fw} />
-
-# Handling multiple sequences
-
-{#if fw === 'pt'}
-
-<DocNotebookDropdown
-  classNames="absolute z-10 right-0 top-0"
-  options={[
-    {label: "Google Colab", value: "https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter2/section5_pt.ipynb"},
-    {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter2/section5_pt.ipynb"},
-]} />
-
-{:else}
-
-<DocNotebookDropdown
-  classNames="absolute z-10 right-0 top-0"
-  options={[
-    {label: "Google Colab", value: "https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter2/section5_tf.ipynb"},
-    {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter2/section5_tf.ipynb"},
-]} />
-
-{/if}
-
-{#if fw === 'pt'}
-<Youtube id="M6adb1j2jPI"/>
-{:else}
-<Youtube id="ROxrFOEbsQE"/>
-{/if}
-
-In the previous section, we explored the simplest of use cases: doing inference on a single sequence of a small length. However, some questions emerge already:
-
-- How do we handle multiple sequences?
-- How do we handle multiple sequences *of different lengths*?
-- Are vocabulary indices the only inputs that allow a model to work well?
-- Is there such a thing as too long a sequence?
-
-Let's see what kinds of problems these questions pose, and how we can solve them using the 🤗 Transformers API.
-
-## Models expect a batch of inputs
-
-In the previous exercise you saw how sequences get translated into lists of numbers. Let's convert this list of numbers to a tensor and send it to the model:
-
-{#if fw === 'pt'}
-```py
-import torch
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
-
-checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
-tokenizer = AutoTokenizer.from_pretrained(checkpoint)
-model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
-
-sequence = "I've been waiting for a HuggingFace course my whole life."
-
-tokens = tokenizer.tokenize(sequence)
-ids = tokenizer.convert_tokens_to_ids(tokens)
-input_ids = torch.tensor(ids)
-# This line will fail.
-model(input_ids)
-```
-
-```python out
-IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)
-```
-{:else}
-```py
-import tensorflow as tf
-from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
-
-checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
-tokenizer = AutoTokenizer.from_pretrained(checkpoint)
-model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint)
-
-sequence = "I've been waiting for a HuggingFace course my whole life."
-
-tokens = tokenizer.tokenize(sequence)
-ids = tokenizer.convert_tokens_to_ids(tokens)
-input_ids = tf.constant(ids)
-# This line will fail.
-model(input_ids)
-```
-
-```py out
-InvalidArgumentError: Input to reshape is a tensor with 14 values, but the requested shape has 196 [Op:Reshape]
-```
-{/if}
-
-Oh no! Why did this fail? "We followed the steps from the pipeline in section 2.
-
-The problem is that we sent a single sequence to the model, whereas 🤗 Transformers models expect multiple sentences by default. Here we tried to do everything the tokenizer did behind the scenes when we applied it to a `sequence`, but if you look closely, you'll see that it didn't just convert the list of input IDs into a tensor, it added a dimension on top of it:
-
-{#if fw === 'pt'}
-```py
-tokenized_inputs = tokenizer(sequence, return_tensors="pt")
-print(tokenized_inputs["input_ids"])
-```
-
-```python out
-tensor([[  101,  1045,  1005,  2310,  2042,  3403,  2005,  1037, 17662, 12172,
-          2607,  2026,  2878,  2166,  1012,   102]])
-```
-{:else}
-```py
-tokenized_inputs = tokenizer(sequence, return_tensors="tf")
-print(tokenized_inputs["input_ids"])
-```
-
-```py out
-<tf.Tensor: shape=(1, 16), dtype=int32, numpy=
-array([[  101,  1045,  1005,  2310,  2042,  3403,  2005,  1037, 17662,
-        12172,  2607,  2026,  2878,  2166,  1012,   102]], dtype=int32)>
-```
-{/if}
-
-Let's try again and add a new dimension:
-
-{#if fw === 'pt'}
-```py
-import torch
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
-
-checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
-tokenizer = AutoTokenizer.from_pretrained(checkpoint)
-model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
-
-sequence = "I've been waiting for a HuggingFace course my whole life."
-
-tokens = tokenizer.tokenize(sequence)
-ids = tokenizer.convert_tokens_to_ids(tokens)
-
-input_ids = torch.tensor([ids])
-print("Input IDs:", input_ids)
-
-output = model(input_ids)
-print("Logits:", output.logits)
-```
-{:else}
-```py
-import tensorflow as tf
-from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
-
-checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
-tokenizer = AutoTokenizer.from_pretrained(checkpoint)
-model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint)
-
-sequence = "I've been waiting for a HuggingFace course my whole life."
-
-tokens = tokenizer.tokenize(sequence)
-ids = tokenizer.convert_tokens_to_ids(tokens)
-
-input_ids = tf.constant([ids])
-print("Input IDs:", input_ids)
-
-output = model(input_ids)
-print("Logits:", output.logits)
-```
-{/if}
-
-We print the input IDs as well as the resulting logits — here's the output:
-
-{#if fw === 'pt'}
-```python out
-Input IDs: [[ 1045,  1005,  2310,  2042,  3403,  2005,  1037, 17662, 12172,  2607, 2026,  2878,  2166,  1012]]
-Logits: [[-2.7276,  2.8789]]
-```
-{:else}
-```py out
-Input IDs: tf.Tensor(
-[[ 1045  1005  2310  2042  3403  2005  1037 17662 12172  2607  2026  2878
-   2166  1012]], shape=(1, 14), dtype=int32)
-Logits: tf.Tensor([[-2.7276208  2.8789377]], shape=(1, 2), dtype=float32)
-```
-{/if}
-
-*Batching* is the act of sending multiple sentences through the model, all at once. If you only have one sentence, you can just build a batch with a single sequence: 
-
-```
-batched_ids = [ids, ids]
-```
-
-This is a batch of two identical sequences!
-
-<Tip>
-
-✏️ **Try it out!** Convert this `batched_ids` list into a tensor and pass it through your model. Check that you obtain the same logits as before (but twice)!
-
-</Tip>
-
-Batching allows the model to work when you feed it multiple sentences. Using multiple sequences is just as simple as building a batch with a single sequence. There's a second issue, though. When you're trying to batch together two (or more) sentences, they might be of different lengths. If you've ever worked with tensors before, you know that they need to be of rectangular shape, so you won't be able to convert the list of input IDs into a tensor directly. To work around this problem, we usually *pad* the inputs.
-
-## Padding the inputs
-
-The following list of lists cannot be converted to a tensor:
-
-```py no-format
-batched_ids = [
-    [200, 200, 200],
-    [200, 200]
-]
-```
-
-In order to work around this, we'll use *padding* to make our tensors have a rectangular shape. Padding makes sure all our sentences have the same length by adding a special word called the *padding token* to the sentences with fewer values. For example, if you have 10 sentences with 10 words and 1 sentence with 20 words, padding will ensure all the sentences have 20 words. In our example, the resulting tensor looks like this:
-
-```py no-format
-padding_id = 100
-
-batched_ids = [
-    [200, 200, 200],
-    [200, 200, padding_id],
-]
-```
-
-The padding token ID can be found in `tokenizer.pad_token_id`. Let's use it and send our two sentences through the model individually and batched together:
-
-{#if fw === 'pt'}
-```py no-format
-model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
-
-sequence1_ids = [[200, 200, 200]]
-sequence2_ids = [[200, 200]]
-batched_ids = [
-    [200, 200, 200],
-    [200, 200, tokenizer.pad_token_id],
-]
-
-print(model(torch.tensor(sequence1_ids)).logits)
-print(model(torch.tensor(sequence2_ids)).logits)
-print(model(torch.tensor(batched_ids)).logits)
-```
-
-```python out
-tensor([[ 1.5694, -1.3895]], grad_fn=<AddmmBackward>)
-tensor([[ 0.5803, -0.4125]], grad_fn=<AddmmBackward>)
-tensor([[ 1.5694, -1.3895],
-        [ 1.3373, -1.2163]], grad_fn=<AddmmBackward>)
-```
-{:else}
-```py no-format
-model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint)
-
-sequence1_ids = [[200, 200, 200]]
-sequence2_ids = [[200, 200]]
-batched_ids = [
-    [200, 200, 200],
-    [200, 200, tokenizer.pad_token_id],
-]
-
-print(model(tf.constant(sequence1_ids)).logits)
-print(model(tf.constant(sequence2_ids)).logits)
-print(model(tf.constant(batched_ids)).logits)
-```
-
-```py out
-tf.Tensor([[ 1.5693678 -1.3894581]], shape=(1, 2), dtype=float32)
-tf.Tensor([[ 0.5803005  -0.41252428]], shape=(1, 2), dtype=float32)
-tf.Tensor(
-[[ 1.5693681 -1.3894582]
- [ 1.3373486 -1.2163193]], shape=(2, 2), dtype=float32)
-```
-{/if}
-
-There's something wrong with the logits in our batched predictions: the second row should be the same as the logits for the second sentence, but we've got completely different values!
-
-This is because the key feature of Transformer models is attention layers that *contextualize* each token. These will take into account the padding tokens since they attend to all of the tokens of a sequence. To get the same result when passing individual sentences of different lengths through the model or when passing a batch with the same sentences and padding applied, we need to tell those attention layers to ignore the padding tokens. This is done by using an attention mask.
-
-## Attention masks
-
-*Attention masks* are tensors with the exact same shape as the input IDs tensor, filled with 0s and 1s: 1s indicate the corresponding tokens should be attended to, and 0s indicate the corresponding tokens should not be attended to (i.e., they should be ignored by the attention layers of the model).
-
-Let's complete the previous example with an attention mask:
-
-{#if fw === 'pt'}
-```py no-format
-batched_ids = [
-    [200, 200, 200],
-    [200, 200, tokenizer.pad_token_id],
-]
-
-attention_mask = [
-    [1, 1, 1],
-    [1, 1, 0],
-]
-
-outputs = model(torch.tensor(batched_ids), attention_mask=torch.tensor(attention_mask))
-print(outputs.logits)
-```
-
-```python out
-tensor([[ 1.5694, -1.3895],
-        [ 0.5803, -0.4125]], grad_fn=<AddmmBackward>)
-```
-{:else}
-```py no-format
-batched_ids = [
-    [200, 200, 200],
-    [200, 200, tokenizer.pad_token_id],
-]
-
-attention_mask = [
-    [1, 1, 1],
-    [1, 1, 0],
-]
-
-outputs = model(tf.constant(batched_ids), attention_mask=tf.constant(attention_mask))
-print(outputs.logits)
-```
-
-```py out
-tf.Tensor(
-[[ 1.5693681  -1.3894582 ]
- [ 0.5803021  -0.41252586]], shape=(2, 2), dtype=float32)
-```
-{/if}
-
-Now we get the same logits for the second sentence in the batch.
-
-Notice how the last value of the second sequence is a padding ID, which is a 0 value in the attention mask.
-
-<Tip>
-
-✏️ **Try it out!** Apply the tokenization manually on the two sentences used in section 2 ("I've been waiting for a HuggingFace course my whole life." and "I hate this so much!"). Pass them through the model and check that you get the same logits as in section 2. Now batch them together using the padding token, then create the proper attention mask. Check that you obtain the same results when going through the model!
-
-</Tip>
-
-## Longer sequences
-
-With Transformer models, there is a limit to the lengths of the sequences we can pass the models. Most models handle sequences of up to 512 or 1024 tokens, and will crash when asked to process longer sequences. There are two solutions to this problem:
-
-- Use a model with a longer supported sequence length.
-- Truncate your sequences.
-
-Models have different supported sequence lengths, and some specialize in handling very long sequences. [Longformer](https://huggingface.co/transformers/model_doc/longformer.html) is one example, and another is [LED](https://huggingface.co/transformers/model_doc/led.html). If you're working on a task that requires very long sequences, we recommend you take a look at those models.
-
-Otherwise, we recommend you truncate your sequences by specifying the `max_sequence_length` parameter:
-
-```py
-sequence = sequence[:max_sequence_length]
-```

From ecbf2b214b611d479ff53f319da246ff5c65dbd0 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Thu, 7 Apr 2022 19:48:37 +0100
Subject: [PATCH 041/127] Delete 6.mdx

---
 6.mdx | 164 ----------------------------------------------------------
 1 file changed, 164 deletions(-)
 delete mode 100644 6.mdx

diff --git a/6.mdx b/6.mdx
deleted file mode 100644
index 974123515..000000000
--- a/6.mdx
+++ /dev/null
@@ -1,164 +0,0 @@
-<FrameworkSwitchCourse {fw} />
-
-# Putting it all together
-
-{#if fw === 'pt'}
-
-<DocNotebookDropdown
-  classNames="absolute z-10 right-0 top-0"
-  options={[
-    {label: "Google Colab", value: "https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter2/section6_pt.ipynb"},
-    {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter2/section6_pt.ipynb"},
-]} />
-
-{:else}
-
-<DocNotebookDropdown
-  classNames="absolute z-10 right-0 top-0"
-  options={[
-    {label: "Google Colab", value: "https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter2/section6_tf.ipynb"},
-    {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter2/section6_tf.ipynb"},
-]} />
-
-{/if}
-
-In the last few sections, we've been trying our best to do most of the work by hand. We've explored how tokenizers work and looked at tokenization, conversion to input IDs, padding, truncation, and attention masks.
-
-However, as we saw in section 2, the 🤗 Transformers API can handle all of this for us with a high-level function that we'll dive into here. When you call your `tokenizer` directly on the sentence, you get back inputs that are ready to pass through your model:
-
-```py
-from transformers import AutoTokenizer
-
-checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
-tokenizer = AutoTokenizer.from_pretrained(checkpoint)
-
-sequence = "I've been waiting for a HuggingFace course my whole life."
-
-model_inputs = tokenizer(sequence)
-```
-
-Here, the `model_inputs` variable contains everything that's necessary for a model to operate well. For DistilBERT, that includes the input IDs as well as the attention mask. Other models that accept additional inputs will also have those output by the `tokenizer` object.
-
-As we'll see in some examples below, this method is very powerful. First, it can tokenize a single sequence:
-
-```py
-sequence = "I've been waiting for a HuggingFace course my whole life."
-
-model_inputs = tokenizer(sequence)
-```
-
-It also handles multiple sequences at a time, with no change in the API:
-
-```py
-sequences = ["I've been waiting for a HuggingFace course my whole life.", "So have I!"]
-
-model_inputs = tokenizer(sequences)
-```
-
-It can pad according to several objectives:
-
-```py
-# Will pad the sequences up to the maximum sequence length
-model_inputs = tokenizer(sequences, padding="longest")
-
-# Will pad the sequences up to the model max length
-# (512 for BERT or DistilBERT)
-model_inputs = tokenizer(sequences, padding="max_length")
-
-# Will pad the sequences up to the specified max length
-model_inputs = tokenizer(sequences, padding="max_length", max_length=8)
-```
-
-It can also truncate sequences:
-
-```py
-sequences = ["I've been waiting for a HuggingFace course my whole life.", "So have I!"]
-
-# Will truncate the sequences that are longer than the model max length
-# (512 for BERT or DistilBERT)
-model_inputs = tokenizer(sequences, truncation=True)
-
-# Will truncate the sequences that are longer than the specified max length
-model_inputs = tokenizer(sequences, max_length=8, truncation=True)
-```
-
-The `tokenizer` object can handle the conversion to specific framework tensors, which can then be directly sent to the model. For example, in the following code sample we are prompting the tokenizer to return tensors from the different frameworks — `"pt"` returns PyTorch tensors, `"tf"` returns TensorFlow tensors, and `"np"` returns NumPy arrays:
-
-```py
-sequences = ["I've been waiting for a HuggingFace course my whole life.", "So have I!"]
-
-# Returns PyTorch tensors
-model_inputs = tokenizer(sequences, padding=True, return_tensors="pt")
-
-# Returns TensorFlow tensors
-model_inputs = tokenizer(sequences, padding=True, return_tensors="tf")
-
-# Returns NumPy arrays
-model_inputs = tokenizer(sequences, padding=True, return_tensors="np")
-```
-
-## Special tokens
-
-If we take a look at the input IDs returned by the tokenizer, we will see they are a tiny bit different from what we had earlier:
-
-```py
-sequence = "I've been waiting for a HuggingFace course my whole life."
-
-model_inputs = tokenizer(sequence)
-print(model_inputs["input_ids"])
-
-tokens = tokenizer.tokenize(sequence)
-ids = tokenizer.convert_tokens_to_ids(tokens)
-print(ids)
-```
-
-```python out
-[101, 1045, 1005, 2310, 2042, 3403, 2005, 1037, 17662, 12172, 2607, 2026, 2878, 2166, 1012, 102]
-[1045, 1005, 2310, 2042, 3403, 2005, 1037, 17662, 12172, 2607, 2026, 2878, 2166, 1012]
-```
-
-One token ID was added at the beginning, and one at the end. Let's decode the two sequences of IDs above to see what this is about:
-
-```py
-print(tokenizer.decode(model_inputs["input_ids"]))
-print(tokenizer.decode(ids))
-```
-
-```python out
-"[CLS] i've been waiting for a huggingface course my whole life. [SEP]"
-"i've been waiting for a huggingface course my whole life."
-```
-
-The tokenizer added the special word `[CLS]` at the beginning and the special word `[SEP]` at the end. This is because the model was pretrained with those, so to get the same results for inference we need to add them as well. Note that some models don't add special words, or add different ones; models may also add these special words only at the beginning, or only at the end. In any case, the tokenizer knows which ones are expected and will deal with this for you.
-
-## Wrapping up: From tokenizer to model
-
-Now that we've seen all the individual steps the `tokenizer` object uses when applied on texts, let's see one final time how it can handle multiple sequences (padding!), very long sequences (truncation!), and multiple types of tensors with its main API:
-
-{#if fw === 'pt'}
-```py
-import torch
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
-
-checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
-tokenizer = AutoTokenizer.from_pretrained(checkpoint)
-model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
-sequences = ["I've been waiting for a HuggingFace course my whole life.", "So have I!"]
-
-tokens = tokenizer(sequences, padding=True, truncation=True, return_tensors="pt")
-output = model(**tokens)
-```
-{:else}
-```py
-import tensorflow as tf
-from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
-
-checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
-tokenizer = AutoTokenizer.from_pretrained(checkpoint)
-model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint)
-sequences = ["I've been waiting for a HuggingFace course my whole life.", "So have I!"]
-
-tokens = tokenizer(sequences, padding=True, truncation=True, return_tensors="tf")
-output = model(**tokens)
-```
-{/if}

From c4e753d01565fbe44df968adcf61b1473aa5818d Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Thu, 7 Apr 2022 19:48:48 +0100
Subject: [PATCH 042/127] Delete 7.mdx

---
 7.mdx | 13 -------------
 1 file changed, 13 deletions(-)
 delete mode 100644 7.mdx

diff --git a/7.mdx b/7.mdx
deleted file mode 100644
index 122728d08..000000000
--- a/7.mdx
+++ /dev/null
@@ -1,13 +0,0 @@
-# Basic usage completed!
-
-Great job following the course up to here! To recap, in this chapter you:
-
-- Learned the basic building blocks of a Transformer model.
-- Learned what makes up a tokenization pipeline.
-- Saw how to use a Transformer model in practice.
-- Learned how to leverage a tokenizer to convert text to tensors that are understandable by the model.
-- Set up a tokenizer and a model together to get from text to predictions.
-- Learned the limitations of input IDs, and learned about attention masks.
-- Played around with versatile and configurable tokenizer methods.
-
-From now on, you should be able to freely navigate the 🤗 Transformers docs: the vocabulary will sound familiar, and you've already seen the methods that you'll use the majority of the time.

From 96c53e7db813cbae6460efbf3d6f08951f13fb52 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Thu, 7 Apr 2022 19:48:58 +0100
Subject: [PATCH 043/127] Delete 8.mdx

---
 8.mdx | 305 ----------------------------------------------------------
 1 file changed, 305 deletions(-)
 delete mode 100644 8.mdx

diff --git a/8.mdx b/8.mdx
deleted file mode 100644
index 43f0a8c9c..000000000
--- a/8.mdx
+++ /dev/null
@@ -1,305 +0,0 @@
-<FrameworkSwitchCourse {fw} />
-
-<!-- DISABLE-FRONTMATTER-SECTIONS -->
-
-# End-of-chapter quiz
-
-### 1. What is the order of the language modeling pipeline?
-
-<Question
-	choices={[
-		{
-			text: "First, the model, which handles text and returns raw predictions. The tokenizer then makes sense of these predictions and converts them back to text when needed.",
-			explain: "The model cannot understand text! The tokenizer must first tokenize the text and convert it to IDs so that it is understandable by the model."
-		},
-		{
-			text: "First, the tokenizer, which handles text and returns IDs. The model handles these IDs and outputs a prediction, which can be some text.",
-			explain: "The model's prediction cannot be text straight away. The tokenizer has to be used in order to convert the prediction back to text!"
-		},
-		{
-			text: "The tokenizer handles text and returns IDs. The model handles these IDs and outputs a prediction. The tokenizer can then be used once again to convert these predictions back to some text.",
-			explain: "Correct! The tokenizer can be used for both tokenizing and de-tokenizing.",
-            correct: true
-		}
-	]}
-/>
-
-### 2. How many dimensions does the tensor output by the base Transformer model have, and what are they?
-
-<Question
-	choices={[
-		{
-			text: "2: The sequence length and the batch size",
-			explain: "False! The tensor output by the model has a third dimension: hidden size."
-		},
-		{
-			text: "2: The sequence length and the hidden size",
-			explain: "False! All Transformer models handle batches, even with a single sequence; that would be a batch size of 1!"
-		},
-		{
-			text: "3: The sequence length, the batch size, and the hidden size",
-			explain: "Correct!",
-            correct: true
-		}
-	]}
-/>
-
-### 3. Which of the following is an example of subword tokenization?
-
-<Question
-	choices={[
-		{
-			text: "WordPiece",
-			explain: "Yes, that's one example of subword tokenization!",
-            correct: true
-		},
-		{
-			text: "Character-based tokenization",
-			explain: "Character-based tokenization is not a type of subword tokenization."
-		},
-		{
-			text: "Splitting on whitespace and punctuation",
-			explain: "That's a word-based tokenization scheme!"
-		},
-		{
-			text: "BPE",
-			explain: "Yes, that's one example of subword tokenization!",
-            correct: true
-        },
-		{
-			text: "Unigram",
-			explain: "Yes, that's one example of subword tokenization!",
-            correct: true
-        },
-		{
-			text: "None of the above",
-			explain: "Incorrect!"
-        }
-	]}
-/>
-
-### 4. What is a model head?
-
-<Question
-	choices={[
-		{
-			text: "A component of the base Transformer network that redirects tensors to their correct layers",
-			explain: "Incorrect! There's no such component."
-		},
-		{
-			text: "Also known as the self-attention mechanism, it adapts the representation of a token according to the other tokens of the sequence",
-			explain: "Incorrect! The self-attention layer does contain attention \"heads,\" but these are not adaptation heads."
-		},
-		{
-			text: "An additional component, usually made up of one or a few layers, to convert the transformer predictions to a task-specific output",
-			explain: "That's right. Adaptation heads, also known simply as heads, come up in different forms: language modeling heads, question answering heads, sequence classification heads... ",
-			correct: true
-		} 
-	]}
-/>
-
-{#if fw === 'pt'}
-### 5. What is an AutoModel?
-
-<Question
-	choices={[
-		{
-			text: "A model that automatically trains on your data",
-			explain: "Incorrect. Are you mistaking this with our <a href='https://huggingface.co/autonlp'>AutoNLP</a> product?"
-		},
-		{
-			text: "An object that returns the correct architecture based on the checkpoint",
-			explain: "Exactly: the <code>AutoModel</code> only needs to know the checkpoint from which to initialize to return the correct architecture.",
-			correct: true
-		},
-		{
-			text: "A model that automatically detects the language used for its inputs to load the correct weights",
-			explain: "Incorrect; while some checkpoints and models are capable of handling multiple languages, there are no built-in tools for automatic checkpoint selection according to language. You should head over to the <a href='https://huggingface.co/models'>Model Hub</a> to find the best checkpoint for your task!"
-		} 
-	]}
-/>
-
-{:else}
-### 5. What is an TFAutoModel?
-
-<Question
-	choices={[
-		{
-			text: "A model that automatically trains on your data",
-			explain: "Incorrect. Are you mistaking this with our <a href='https://huggingface.co/autonlp'>AutoNLP</a> product?"
-		},
-		{
-			text: "An object that returns the correct architecture based on the checkpoint",
-			explain: "Exactly: the <code>TFAutoModel</code> only needs to know the checkpoint from which to initialize to return the correct architecture.",
-			correct: true
-		},
-		{
-			text: "A model that automatically detects the language used for its inputs to load the correct weights",
-			explain: "Incorrect; while some checkpoints and models are capable of handling multiple languages, there are no built-in tools for automatic checkpoint selection according to language. You should head over to the <a href='https://huggingface.co/models'>Model Hub</a> to find the best checkpoint for your task!"
-		} 
-	]}
-/>
-
-{/if}
-
-### 6. What are the techniques to be aware of when batching sequences of different lengths together?
-
-<Question
-	choices={[
-		{
-			text: "Truncating",
-			explain: "Yes, truncation is a correct way of evening out sequences so that they fit in a rectangular shape. Is it the only one, though?",
-			correct: true
-		},
-		{
-			text: "Returning tensors",
-			explain: "While the other techniques allow you to return rectangular tensors, returning tensors isn't helpful when batching sequences together."
-		},
-		{
-			text: "Padding",
-			explain: "Yes, padding is a correct way of evening out sequences so that they fit in a rectangular shape. Is it the only one, though?",
-			correct: true
-		}, 
-		{
-			text: "Attention masking",
-			explain: "Absolutely! Attention masks are of prime importance when handling sequences of different lengths. That's not the only technique to be aware of, however.",
-			correct: true
-		} 
-	]}
-/>
-
-### 7. What is the point of applying a SoftMax function to the logits output by a sequence classification model?
-
-<Question
-	choices={[
-		{
-			text: "It softens the logits so that they're more reliable.",
-			explain: "No, the SoftMax function does not affect the reliability of results."
-		},
-		{
-			text: "It applies a lower and upper bound so that they're understandable.",
-			explain: "Correct! The resulting values are bound between 0 and 1. That's not the only reason we use a SoftMax function, though.",
-            correct: true
-		},
-		{
-			text: "The total sum of the output is then 1, resulting in a possible probabilistic interpretation.",
-			explain: "Correct! That's not the only reason we use a SoftMax function, though.",
-            correct: true
-		}
-	]}
-/>
-
-### 8. What method is most of the tokenizer API centered around?
-
-<Question
-	choices={[
-		{
-			text: "<code>encode</code>, as it can encode text into IDs and IDs into predictions",
-			explain: "Wrong! While the <code>encode</code> method does exist on tokenizers, it does not exist on models."
-		},
-		{
-			text: "Calling the tokenizer object directly.",
-			explain: "Exactly! The <code>__call__</code> method of the tokenizer is a very powerful method which can handle pretty much anything. It is also the method used to retrieve predictions from a model.",
-			correct: true
-		},
-		{
-			text: "<code>pad</code>",
-			explain: "Wrong! Padding is very useful, but it's just one part of the tokenizer API."
-		},
-		{
-			text: "<code>tokenize</code>",
-			explain: "The <code>tokenize</code> method is arguably one of the most useful methods, but it isn't the core of the tokenizer API."
-		}
-	]}
-/>
-
-### 9. What does the `result` variable contain in this code sample?
-
-```py
-from transformers import AutoTokenizer
-
-tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
-result = tokenizer.tokenize("Hello!")
-```
-
-<Question
-	choices={[
-		{
-			text: "A list of strings, each string being a token",
-			explain: "Absolutely! Convert this to IDs, and send them to a model!",
-            correct: true
-		},
-		{
-			text: "A list of IDs",
-			explain: "Incorrect; that's what the <code>__call__</code> or <code>convert_tokens_to_ids</code> method is for!"
-		},
-		{
-			text: "A string containing all of the tokens",
-			explain: "This would be suboptimal, as the goal is to split the string into multiple tokens."
-		}
-	]}
-/>
-
-{#if fw === 'pt'}
-### 10. Is there something wrong with the following code?
-
-```py
-from transformers import AutoTokenizer, AutoModel
-
-tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
-model = AutoModel.from_pretrained("gpt2")
-
-encoded = tokenizer("Hey!", return_tensors="pt")
-result = model(**encoded)
-```
-
-<Question
-	choices={[
-		{
-			text: "No, it seems correct.",
-			explain: "Unfortunately, coupling a model with a tokenizer that was trained with a different checkpoint is rarely a good idea. The model was not trained to make sense out of this tokenizer's output, so the model output (if it can even run!) will not make any sense."
-		},
-		{
-			text: "The tokenizer and model should always be from the same checkpoint.",
-			explain: "Right!",
-            correct: true
-		},
-		{
-			text: "It's good practice to pad and truncate with the tokenizer as every input is a batch.",
-			explain: "It's true that every model input needs to be a batch. However, truncating or padding this sequence wouldn't necessarily make sense as there is only one of it, and those are techniques to batch together a list of sentences."
-		}
-	]}
-/>
-
-{:else}
-### 10. Is there something wrong with the following code?
-
-```py
-from transformers import AutoTokenizer, TFAutoModel
-
-tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
-model = TFAutoModel.from_pretrained("gpt2")
-
-encoded = tokenizer("Hey!", return_tensors="pt")
-result = model(**encoded)
-```
-
-<Question
-	choices={[
-		{
-			text: "No, it seems correct.",
-			explain: "Unfortunately, coupling a model with a tokenizer that was trained with a different checkpoint is rarely a good idea. The model was not trained to make sense out of this tokenizer's output, so the model output (if it can even run!) will not make any sense."
-		},
-		{
-			text: "The tokenizer and model should always be from the same checkpoint.",
-			explain: "Right!",
-            correct: true
-		},
-		{
-			text: "It's good practice to pad and truncate with the tokenizer as every input is a batch.",
-			explain: "It's true that every model input needs to be a batch. However, truncating or padding this sequence wouldn't necessarily make sense as there is only one of it, and those are techniques to batch together a list of sentences."
-		}
-	]}
-/>
-
-{/if}

From 83732dd2d45323633ec321c08fb494d4a6b505fb Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 26 Apr 2022 20:38:01 +0100
Subject: [PATCH 044/127] Updates toctree

---
 chapters/it/_toctree.yml | 169 ---------------------------------------
 1 file changed, 169 deletions(-)

diff --git a/chapters/it/_toctree.yml b/chapters/it/_toctree.yml
index d664b9a47..a076b2f04 100644
--- a/chapters/it/_toctree.yml
+++ b/chapters/it/_toctree.yml
@@ -2,172 +2,3 @@
   sections:
   - local: chapter0/1
     title: Introduzione
-
-- title: 1. Modelli Transformer
-  sections:
-  - local: chapter1/1
-    title: Introduzione
-  - local: chapter1/2
-    title: Trattamento Automatico del Linguaggio
-  - local: chapter1/3
-    title: Cosa fanno i Transformers?
-  - local: chapter1/4
-    title: Come funzionano i Transformers?
-  - local: chapter1/5
-    title: Modelli Encoder
-  - local: chapter1/6
-    title: Modelli Decoder
-  - local: chapter1/7
-    title: Modelli Sequence-to-sequence
-  - local: chapter1/8
-    title: Bias e limiti
-  - local: chapter1/9
-    title: Riassunto
-  - local: chapter1/10
-    title: Quiz di fine capitolo
-    quiz: 1
-
-- title: 2. Utilizzo dei Transformers di 🤗
-  sections:
-  - local: chapter2/1
-    title: Introduzione
-  - local: chapter2/2
-    title: Dietro la pipeline
-  - local: chapter2/3
-    title: Modelli
-  - local: chapter2/4
-    title: Tokenizzatori
-  - local: chapter2/5
-    title: Gestione di sequenze multiple
-  - local: chapter2/6
-    title: Mettere tutto insieme
-  - local: chapter2/7
-    title: Usi di base completati!
-  - local: chapter2/8
-    title: Quiz di fine capitolo
-    quiz: 2
-
-- title: 3. Fine-tuning a pretrained model
-  sections:
-  - local: chapter3/1
-    title: Introduction
-  - local: chapter3/2
-    title: Processing the data
-  - local: chapter3/3
-    title: Fine-tuning a model with the Trainer API or Keras
-    local_fw: { pt: chapter3/3, tf: chapter3/3_tf }
-  - local: chapter3/4
-    title: A full training
-  - local: chapter3/5
-    title: Fine-tuning, Check!
-  - local: chapter3/6
-    title: End-of-chapter quiz
-    quiz: 3
-
-- title: 4. Sharing models and tokenizers
-  sections:
-  - local: chapter4/1
-    title: The Hugging Face Hub
-  - local: chapter4/2
-    title: Using pretrained models
-  - local: chapter4/3
-    title: Sharing pretrained models
-  - local: chapter4/4
-    title: Building a model card
-  - local: chapter4/5
-    title: Part 1 completed!
-  - local: chapter4/6
-    title: End-of-chapter quiz
-    quiz: 4
-
-- title: 5. The 🤗 Datasets library
-  sections:
-  - local: chapter5/1
-    title: Introduction
-  - local: chapter5/2
-    title: What if my dataset isn't on the Hub?
-  - local: chapter5/3
-    title: Time to slice and dice
-  - local: chapter5/4
-    title: Big data? 🤗 Datasets to the rescue!
-  - local: chapter5/5
-    title: Creating your own dataset
-  - local: chapter5/6
-    title: Semantic search with FAISS
-  - local: chapter5/7
-    title: 🤗 Datasets, check!
-  - local: chapter5/8
-    title: End-of-chapter quiz
-    quiz: 5
-
-- title: 6. The 🤗 Tokenizers library
-  sections:
-  - local: chapter6/1
-    title: Introduction
-  - local: chapter6/2
-    title: Training a new tokenizer from an old one
-  - local: chapter6/3
-    title: Fast tokenizers' special powers
-  - local: chapter6/3b
-    title: Fast tokenizers in the QA pipeline
-  - local: chapter6/4
-    title: Normalization and pre-tokenization
-  - local: chapter6/5
-    title: Byte-Pair Encoding tokenization
-  - local: chapter6/6
-    title: WordPiece tokenization
-  - local: chapter6/7
-    title: Unigram tokenization
-  - local: chapter6/8
-    title: Building a tokenizer, block by block
-  - local: chapter6/9
-    title: Tokenizers, check!
-  - local: chapter6/10
-    title: End-of-chapter quiz
-    quiz: 6
-
-- title: 7. Main NLP tasks
-  sections:
-  - local: chapter7/1
-    title: Introduction
-  - local: chapter7/2
-    title: Token classification
-  - local: chapter7/3
-    title: Fine-tuning a masked language model
-  - local: chapter7/4
-    title: Translation
-  - local: chapter7/5
-    title: Summarization
-  - local: chapter7/6
-    title: Training a causal language model from scratch
-  - local: chapter7/7
-    title: Question answering
-  - local: chapter7/8
-    title: Mastering NLP
-  - local: chapter7/9
-    title: End-of-chapter quiz
-    quiz: 7
-
-- title: 8. How to ask for help
-  sections:
-  - local: chapter8/1
-    title: Introduction
-  - local: chapter8/2
-    title: What to do when you get an error
-  - local: chapter8/3
-    title: Asking for help on the forums
-  - local: chapter8/4
-    title: Debugging the training pipeline
-    local_fw: { pt: chapter8/4, tf: chapter8/4_tf }
-  - local: chapter8/5
-    title: How to write a good issue
-  - local: chapter8/6
-    title: Part 2 completed!
-  - local: chapter8/7
-    title: End-of-chapter quiz
-    quiz: 8
-
-- title: Hugging Face Course Event
-  sections:
-  - local: event/1
-    title: Part 2 Release Event

From 5cbaee8b57815841e9cc31cf1ab621d6cf8048b6 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 26 Apr 2022 20:38:37 +0100
Subject: [PATCH 045/127] Delete 2.mdx

---
 chapters/it/chapter1/2.mdx | 21 ---------------------
 1 file changed, 21 deletions(-)
 delete mode 100644 chapters/it/chapter1/2.mdx

diff --git a/chapters/it/chapter1/2.mdx b/chapters/it/chapter1/2.mdx
deleted file mode 100644
index 4e4aecc1a..000000000
--- a/chapters/it/chapter1/2.mdx
+++ /dev/null
@@ -1,21 +0,0 @@
-# Natural Language Processing
-
-Before jumping into Transformer models, let's do a quick overview of what natural language processing is and why we care about it.
-
-## What is NLP?
-
-NLP is a field of linguistics and machine learning focused on understanding everything related to human language. The aim of NLP tasks is not only to understand single words individually, but to be able to understand the context of those words.
-
-The following is a list of common NLP tasks, with some examples of each:
-
-- **Classifying whole sentences**: Getting the sentiment of a review, detecting if an email is spam, determining if a sentence is grammatically correct or whether two sentences are logically related or not
-- **Classifying each word in a sentence**: Identifying the grammatical components of a sentence (noun, verb, adjective), or the named entities (person, location, organization)
-- **Generating text content**: Completing a prompt with auto-generated text, filling in the blanks in a text with masked words
-- **Extracting an answer from a text**: Given a question and a context, extracting the answer to the question based on the information provided in the context
-- **Generating a new sentence from an input text**: Translating a text into another language, summarizing a text
-
-NLP isn't limited to written text though. It also tackles complex challenges in speech recognition and computer vision, such as generating a transcript of an audio sample or a description of an image.
-
-## Why is it challenging?
-
-Computers don't process information in the same way as humans. For example, when we read the sentence "I am hungry," we can easily understand its meaning. Similarly, given two sentences such as "I am hungry" and "I am sad," we're able to easily determine how similar they are. For machine learning (ML) models, such tasks are more difficult. The text needs to be processed in a way that enables the model to learn from it. And because language is complex, we need to think carefully about how this processing must be done. There has been a lot of research done on how to represent text, and we will look at some methods in the next chapter.

From c5c9c0d7b3631ca6339a288dd7d01b7bc506f2df Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 26 Apr 2022 20:38:50 +0100
Subject: [PATCH 046/127] Delete 9.mdx

---
 chapters/it/chapter1/9.mdx | 11 -----------
 1 file changed, 11 deletions(-)
 delete mode 100644 chapters/it/chapter1/9.mdx

diff --git a/chapters/it/chapter1/9.mdx b/chapters/it/chapter1/9.mdx
deleted file mode 100644
index 4cd91feac..000000000
--- a/chapters/it/chapter1/9.mdx
+++ /dev/null
@@ -1,11 +0,0 @@
-# Summary
-
-In this chapter, you saw how to approach different NLP tasks using the high-level `pipeline()` function from 🤗 Transformers. You also saw how to search for and use models in the Hub, as well as how to use the Inference API to test the models directly in your browser.
-
-We discussed how Transformer models work at a high level, and talked about the importance of transfer learning and fine-tuning. A key aspect is that you can use the full architecture or only the encoder or decoder, depending on what kind of task you aim to solve. The following table summarizes this:
-
-| Model           | Examples                                   | Tasks                                                                            |
-|-----------------|--------------------------------------------|----------------------------------------------------------------------------------|
-| Encoder         | ALBERT, BERT, DistilBERT, ELECTRA, RoBERTa | Sentence classification, named entity recognition, extractive question answering |
-| Decoder         | CTRL, GPT, GPT-2, Transformer XL           | Text generation                                                                  |
-| Encoder-decoder | BART, T5, Marian, mBART                    | Summarization, translation, generative question answering                        |

From dc62217150d0ff7e332ca94c56089c320d578e76 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 26 Apr 2022 20:39:00 +0100
Subject: [PATCH 047/127] Delete 8.mdx

---
 chapters/it/chapter1/8.mdx | 32 --------------------------------
 1 file changed, 32 deletions(-)
 delete mode 100644 chapters/it/chapter1/8.mdx

diff --git a/chapters/it/chapter1/8.mdx b/chapters/it/chapter1/8.mdx
deleted file mode 100644
index 90c80665d..000000000
--- a/chapters/it/chapter1/8.mdx
+++ /dev/null
@@ -1,32 +0,0 @@
-# Bias and limitations
-
-<DocNotebookDropdown
-  classNames="absolute z-10 right-0 top-0"
-  options={[
-    {label: "Google Colab", value: "https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter1/section8.ipynb"},
-    {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter1/section8.ipynb"},
-]} />
-
-If your intent is to use a pretrained model or a fine-tuned version in production, please be aware that, while these models are powerful tools, they come with limitations. The biggest of these is that, to enable pretraining on large amounts of data, researchers often scrape all the content they can find, taking the best as well as the worst of what is available on the internet. 
-
-To give a quick illustration, let's go back the example of a `fill-mask` pipeline with the BERT model:
-
-```python
-from transformers import pipeline
-
-unmasker = pipeline("fill-mask", model="bert-base-uncased")
-result = unmasker("This man works as a [MASK].")
-print([r["token_str"] for r in result])
-
-result = unmasker("This woman works as a [MASK].")
-print([r["token_str"] for r in result])
-```
-
-```python out
-['lawyer', 'carpenter', 'doctor', 'waiter', 'mechanic']
-['nurse', 'waitress', 'teacher', 'maid', 'prostitute']
-```
-
-When asked to fill in the missing word in these two sentences, the model gives only one gender-free answer (waiter/waitress). The others are work occupations usually associated with one specific gender -- and yes, prostitute ended up in the top 5 possibilities the model associates with "woman" and "work." This happens even though BERT is one of the rare Transformer models not built by scraping data from all over the internet, but rather using apparently neutral data (it's trained on the [English Wikipedia](https://huggingface.co/datasets/wikipedia) and [BookCorpus](https://huggingface.co/datasets/bookcorpus) datasets). 
-
-When you use these tools, you therefore need to keep in the back of your mind that the original model you are using could very easily generate sexist, racist, or homophobic content. Fine-tuning the model on your data won't make this intrinsic bias disappear.

From 87b4cda8339554a7636d19710f8cb9d87f426136 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 26 Apr 2022 20:39:08 +0100
Subject: [PATCH 048/127] Delete 7.mdx

---
 chapters/it/chapter1/7.mdx | 16 ----------------
 1 file changed, 16 deletions(-)
 delete mode 100644 chapters/it/chapter1/7.mdx

diff --git a/chapters/it/chapter1/7.mdx b/chapters/it/chapter1/7.mdx
deleted file mode 100644
index 3639c2a81..000000000
--- a/chapters/it/chapter1/7.mdx
+++ /dev/null
@@ -1,16 +0,0 @@
-# Sequence-to-sequence models
-
-<Youtube id="0_4KEb08xrE" />
-
-Encoder-decoder models (also called *sequence-to-sequence models*) use both parts of the Transformer architecture. At each stage, the attention layers of the encoder can access all the words in the initial sentence, whereas the attention layers of the decoder can only access the words positioned before a given word in the input.
-
-The pretraining of these models can be done using the objectives of encoder or decoder models, but usually involves something a bit more complex. For instance, [T5](https://huggingface.co/t5-base) is pretrained by replacing random spans of text (that can contain several words) with a single mask special word, and the objective is then to predict the text that this mask word replaces.
-
-Sequence-to-sequence models are best suited for tasks revolving around generating new sentences depending on a given input, such as summarization, translation, or generative question answering.
-
-Representatives of this family of models include:
-
-- [BART](https://huggingface.co/transformers/model_doc/bart.html)
-- [mBART](https://huggingface.co/transformers/model_doc/mbart.html)
-- [Marian](https://huggingface.co/transformers/model_doc/marian.html)
-- [T5](https://huggingface.co/transformers/model_doc/t5.html)

From 7c7a730aedc01587b7d50a050d8078db2d3cbb6d Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 26 Apr 2022 20:39:17 +0100
Subject: [PATCH 049/127] Delete 6.mdx

---
 chapters/it/chapter1/6.mdx | 16 ----------------
 1 file changed, 16 deletions(-)
 delete mode 100644 chapters/it/chapter1/6.mdx

diff --git a/chapters/it/chapter1/6.mdx b/chapters/it/chapter1/6.mdx
deleted file mode 100644
index 87ad85ec3..000000000
--- a/chapters/it/chapter1/6.mdx
+++ /dev/null
@@ -1,16 +0,0 @@
-# Decoder models
-
-<Youtube id="d_ixlCubqQw" />
-
-Decoder models use only the decoder of a Transformer model. At each stage, for a given word the attention layers can only access the words positioned before it in the sentence. These models are often called *auto-regressive models*.
-
-The pretraining of decoder models usually revolves around predicting the next word in the sentence.
-
-These models are best suited for tasks involving text generation.
-
-Representatives of this family of models include:
-
-- [CTRL](https://huggingface.co/transformers/model_doc/ctrl.html)
-- [GPT](https://huggingface.co/transformers/model_doc/gpt.html)
-- [GPT-2](https://huggingface.co/transformers/model_doc/gpt2.html)
-- [Transformer XL](https://huggingface.co/transformers/model_doc/transformerxl.html)

From 260aec7b06983976d655e97eaf99e5d61d91923c Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 26 Apr 2022 20:39:28 +0100
Subject: [PATCH 050/127] Delete 5.mdx

---
 chapters/it/chapter1/5.mdx | 17 -----------------
 1 file changed, 17 deletions(-)
 delete mode 100644 chapters/it/chapter1/5.mdx

diff --git a/chapters/it/chapter1/5.mdx b/chapters/it/chapter1/5.mdx
deleted file mode 100644
index 1c707033b..000000000
--- a/chapters/it/chapter1/5.mdx
+++ /dev/null
@@ -1,17 +0,0 @@
-# Encoder models
-
-<Youtube id="MUqNwgPjJvQ" />
-
-Encoder models use only the encoder of a Transformer model. At each stage, the attention layers can access all the words in the initial sentence. These models are often characterized as having "bi-directional" attention, and are often called *auto-encoding models*.
-
-The pretraining of these models usually revolves around somehow corrupting a given sentence (for instance, by masking random words in it) and tasking the model with finding or reconstructing the initial sentence.
-
-Encoder models are best suited for tasks requiring an understanding of the full sentence, such as sentence classification, named entity recognition (and more generally word classification), and extractive question answering.
-
-Representatives of this family of models include:
-
-- [ALBERT](https://huggingface.co/transformers/model_doc/albert.html)
-- [BERT](https://huggingface.co/transformers/model_doc/bert.html)
-- [DistilBERT](https://huggingface.co/transformers/model_doc/distilbert.html)
-- [ELECTRA](https://huggingface.co/transformers/model_doc/electra.html)
-- [RoBERTa](https://huggingface.co/transformers/model_doc/roberta.html)

From e78997e7f41034e244cdb016a18bcf2264ba6f3a Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 26 Apr 2022 20:39:39 +0100
Subject: [PATCH 051/127] Delete 4.mdx

---
 chapters/it/chapter1/4.mdx | 171 -------------------------------------
 1 file changed, 171 deletions(-)
 delete mode 100644 chapters/it/chapter1/4.mdx

diff --git a/chapters/it/chapter1/4.mdx b/chapters/it/chapter1/4.mdx
deleted file mode 100644
index 255d37f6e..000000000
--- a/chapters/it/chapter1/4.mdx
+++ /dev/null
@@ -1,171 +0,0 @@
-# How do Transformers work?
-
-In this section, we will take a high-level look at the architecture of Transformer models.
-
-## A bit of Transformer history
-
-Here are some reference points in the (short) history of Transformer models:
-
-<div class="flex justify-center">
-<img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/transformers_chrono.svg" alt="A brief chronology of Transformers models.">
-<img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/transformers_chrono-dark.svg" alt="A brief chronology of Transformers models.">
-</div>
-
-The [Transformer architecture](https://arxiv.org/abs/1706.03762) was introduced in June 2017. The focus of the original research was on translation tasks. This was followed by the introduction of several influential models, including:
-
-- **June 2018**: [GPT](https://cdn.openai.com/research-covers/language-unsupervised/language_understanding_paper.pdf), the first pretrained Transformer model, used for fine-tuning on various NLP tasks and obtained state-of-the-art results
-
-- **October 2018**: [BERT](https://arxiv.org/abs/1810.04805), another large pretrained model, this one designed to produce better summaries of sentences (more on this in the next chapter!)
-
-- **February 2019**: [GPT-2](https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf), an improved (and bigger) version of GPT that was not immediately publicly released due to ethical concerns
-
-- **October 2019**: [DistilBERT](https://arxiv.org/abs/1910.01108), a distilled version of BERT that is 60% faster, 40% lighter in memory, and still retains 97% of BERT's performance
-
-- **October 2019**: [BART](https://arxiv.org/abs/1910.13461) and [T5](https://arxiv.org/abs/1910.10683), two large pretrained models using the same architecture as the original Transformer model (the first to do so)
-
-- **May 2020**, [GPT-3](https://arxiv.org/abs/2005.14165), an even bigger version of GPT-2 that is able to perform well on a variety of tasks without the need for fine-tuning (called _zero-shot learning_)
-
-This list is far from comprehensive, and is just meant to highlight a few of the different kinds of Transformer models. Broadly, they can be grouped into three categories:
-
-- GPT-like (also called _auto-regressive_ Transformer models)
-- BERT-like (also called _auto-encoding_ Transformer models) 
-- BART/T5-like (also called _sequence-to-sequence_ Transformer models)
-
-We will dive into these families in more depth later on.
-
-## Transformers are language models
-
-All the Transformer models mentioned above (GPT, BERT, BART, T5, etc.) have been trained as *language models*. This means they have been trained on large amounts of raw text in a self-supervised fashion. Self-supervised learning is a type of training in which the objective is automatically computed from the inputs of the model. That means that humans are not needed to label the data!
-
-This type of model develops a statistical understanding of the language it has been trained on, but it's not very useful for specific practical tasks. Because of this, the general pretrained model then goes through a process called *transfer learning*. During this process, the model is fine-tuned in a supervised way -- that is, using human-annotated labels -- on a given task.
-
-An example of a task is predicting the next word in a sentence having read the *n* previous words. This is called *causal language modeling* because the output depends on the past and present inputs, but not the future ones.
-
-<div class="flex justify-center">
-<img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/causal_modeling.svg" alt="Example of causal language modeling in which the next word from a sentence is predicted.">
-<img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/causal_modeling-dark.svg" alt="Example of causal language modeling in which the next word from a sentence is predicted.">
-</div>
-
-Another example is *masked language modeling*, in which the model predicts a masked word in the sentence.
-
-<div class="flex justify-center">
-<img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/masked_modeling.svg" alt="Example of masked language modeling in which a masked word from a sentence is predicted.">
-<img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/masked_modeling-dark.svg" alt="Example of masked language modeling in which a masked word from a sentence is predicted.">
-</div>
-
-## Transformers are big models
-
-Apart from a few outliers (like DistilBERT), the general strategy to achieve better performance is by increasing the models' sizes as well as the amount of data they are pretrained on.
-
-<div class="flex justify-center">
-<img src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/model_parameters.png" alt="Number of parameters of recent Transformers models" width="90%">
-</div>
-
-Unfortunately, training a model, especially a large one, requires a large amount of data. This becomes very costly in terms of time and compute resources. It even translates to environmental impact, as can be seen in the following graph.
-
-<div class="flex justify-center">
-<img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/carbon_footprint.svg" alt="The carbon footprint of a large language model.">
-<img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/carbon_footprint-dark.svg" alt="The carbon footprint of a large language model.">
-</div>
-
-<Youtube id="ftWlj4FBHTg"/>
-
-And this is showing a project for a (very big) model led by a team consciously trying to reduce the environmental impact of pretraining. The footprint of running lots of trials to get the best hyperparameters would be even higher.
-
-Imagine if each time a research team, a student organization, or a company wanted to train a model, it did so from scratch. This would lead to huge, unnecessary global costs!
-
-This is why sharing language models is paramount: sharing the trained weights and building on top of already trained weights reduces the overall compute cost and carbon footprint of the community.
-
-
-## Transfer Learning
-
-<Youtube id="BqqfQnyjmgg" />
-
-*Pretraining* is the act of training a model from scratch: the weights are randomly initialized, and the training starts without any prior knowledge.
-
-<div class="flex justify-center">
-<img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/pretraining.svg" alt="The pretraining of a language model is costly in both time and money.">
-<img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/pretraining-dark.svg" alt="The pretraining of a language model is costly in both time and money.">
-</div>
-
-This pretraining is usually done on very large amounts of data. Therefore, it requires a very large corpus of data, and training can take up to several weeks.
-
-*Fine-tuning*, on the other hand, is the training done **after** a model has been pretrained. To perform fine-tuning, you first acquire a pretrained language model, then perform additional training with a dataset specific to your task. Wait -- why not simply train directly for the final task? There are a couple of reasons:
-
-*  The pretrained model was already trained on a dataset that has some similarities with the fine-tuning dataset. The fine-tuning process is thus able to take advantage of knowledge acquired by the initial model during pretraining (for instance, with NLP problems, the pretrained model will have some kind of statistical understanding of the language you are using for your task). 
-*  Since the pretrained model was already trained on lots of data, the fine-tuning requires way less data to get decent results.
-*  For the same reason, the amount of time and resources needed to get good results are much lower.
-
-For example, one could leverage a pretrained model trained on the English language and then fine-tune it on an arXiv corpus, resulting in a science/research-based model. The fine-tuning will only require a limited amount of data: the knowledge the pretrained model has acquired is "transferred," hence the term *transfer learning*.
-
-<div class="flex justify-center">
-<img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/finetuning.svg" alt="The fine-tuning of a language model is cheaper than pretraining in both time and money.">
-<img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/finetuning-dark.svg" alt="The fine-tuning of a language model is cheaper than pretraining in both time and money.">
-</div>
-
-Fine-tuning a model therefore has lower time, data, financial, and environmental costs. It is also quicker and easier to iterate over different fine-tuning schemes, as the training is less constraining than a full pretraining.
-
-This process will also achieve better results than training from scratch (unless you have lots of data), which is why you should always try to leverage a pretrained model -- one as close as possible to the task you have at hand -- and fine-tune it.
-
-## General architecture
-
-In this section, we'll go over the general architecture of the Transformer model. Don't worry if you don't understand some of the concepts; there are detailed sections later covering each of the components.
-
-<Youtube id="H39Z_720T5s" />
-
-## Introduction
-
-The model is primarily composed of two blocks:
-
-* **Encoder (left)**: The encoder receives an input and builds a representation of it (its features). This means that the model is optimized to acquire understanding from the input.
-* **Decoder (right)**: The decoder uses the encoder's representation (features) along with other inputs to generate a target sequence. This means that the model is optimized for generating outputs.
-
-<div class="flex justify-center">
-<img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/transformers_blocks.svg" alt="Architecture of a Transformers models">
-<img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/transformers_blocks-dark.svg" alt="Architecture of a Transformers models">
-</div>
-
-Each of these parts can be used independently, depending on the task: 
-
-* **Encoder-only models**: Good for tasks that require understanding of the input, such as sentence classification and named entity recognition.
-* **Decoder-only models**: Good for generative tasks such as text generation.
-* **Encoder-decoder models** or **sequence-to-sequence models**: Good for generative tasks that require an input, such as translation or summarization.
-
-We will dive into those architectures independently in later sections.
-
-## Attention layers
-
-A key feature of Transformer models is that they are built with special layers called *attention layers*. In fact, the title of the paper introducing the Transformer architecture was ["Attention Is All You Need"](https://arxiv.org/abs/1706.03762)! We will explore the details of attention layers later in the course; for now, all you need to know is that this layer will tell the model to pay specific attention to certain words in the sentence you passed it (and more or less ignore the others) when dealing with the representation of each word.
-
-To put this into context, consider the task of translating text from English to French. Given the input "You like this course", a translation model will need to also attend to the adjacent word "You" to get the proper translation for the word "like", because in French the verb "like" is conjugated differently depending on the subject. The rest of the sentence, however, is not useful for the translation of that word. In the same vein, when translating "this" the model will also need to pay attention to the word "course", because "this" translates differently depending on whether the associated noun is masculine or feminine. Again, the other words in the sentence will not matter for the translation of "this". With more complex sentences (and more complex grammar rules), the model would need to pay special attention to words that might appear farther away in the sentence to properly translate each word.
-
-The same concept applies to any task associated with natural language: a word by itself has a meaning, but that meaning is deeply affected by the context, which can be any other word (or words) before or after the word being studied.
-
-Now that you have an idea of what attention layers are all about, let's take a closer look at the Transformer architecture.
-
-## The original architecture
-
-The Transformer architecture was originally designed for translation. During training, the encoder receives inputs (sentences) in a certain language, while the decoder receives the same sentences in the desired target language. In the encoder, the attention layers can use all the words in a sentence (since, as we just saw, the translation of a given word can be dependent on what is after as well as before it in the sentence). The decoder, however, works sequentially and can only pay attention to the words in the sentence that it has already translated (so, only the words before the word currently being generated). For example, when we have predicted the first three words of the translated target, we give them to the decoder  which then uses all the inputs of the encoder to try to predict the fourth word.
-
-To speed things up during training (when the model has access to target sentences), the decoder is fed the whole target, but it is not allowed to use future words (if it had access to the word at position 2 when trying to predict the word at position 2, the problem would not be very hard!). For instance, when trying to predict the fourth word, the attention layer will only have access to the words in positions 1 to 3.
-
-The original Transformer architecture looked like this, with the encoder on the left and the decoder on the right:
-
-<div class="flex justify-center">
-<img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/transformers.svg" alt="Architecture of a Transformers models">
-<img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/transformers-dark.svg" alt="Architecture of a Transformers models">
-</div>
-
-Note that the the first attention layer in a decoder block pays attention to all (past) inputs to the decoder, but the second attention layer uses the output of the encoder. It can thus access the whole input sentence to best predict the current word. This is very useful as different languages can have grammatical rules that put the words in different orders, or some context provided later in the sentence may be helpful to determine the best translation of a given word.
-
-The *attention mask* can also be used in the encoder/decoder to prevent the model from paying attention to some special words -- for instance, the special padding word used to make all the inputs the same length when batching together sentences.
-
-##  Architectures vs. checkpoints
-
-As we dive into Transformer models in this course, you'll see mentions of *architectures* and *checkpoints* as well as *models*. These terms all have slightly different meanings: 
-
-* **Architecture**: This is the skeleton of the model -- the definition of each layer and each operation that happens within the model. 
-* **Checkpoints**: These are the weights that will be loaded in a given architecture.
-* **Model**: This is an umbrella term that isn't as precise as "architecture" or "checkpoint": it can mean both. This course will specify *architecture* or *checkpoint* when it matters to reduce ambiguity.
-
-For example, BERT is an architecture while `bert-base-cased`, a set of weights trained by the Google team for the first release of BERT, is a checkpoint. However, one can say "the BERT model" and "the `bert-base-cased` model."

From 5298cbca81fe951ad67e4f5f0d285cd18682d655 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 26 Apr 2022 20:39:51 +0100
Subject: [PATCH 052/127] Delete 3.mdx

---
 chapters/it/chapter1/3.mdx | 329 -------------------------------------
 1 file changed, 329 deletions(-)
 delete mode 100644 chapters/it/chapter1/3.mdx

diff --git a/chapters/it/chapter1/3.mdx b/chapters/it/chapter1/3.mdx
deleted file mode 100644
index ac22e7e8f..000000000
--- a/chapters/it/chapter1/3.mdx
+++ /dev/null
@@ -1,329 +0,0 @@
-# Transformers, what can they do?
-
-<DocNotebookDropdown
-  classNames="absolute z-10 right-0 top-0"
-  options={[
-    {label: "Google Colab", value: "https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter1/section3.ipynb"},
-    {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter1/section3.ipynb"},
-]} />
-
-In this section, we will look at what Transformer models can do and use our first tool from the 🤗 Transformers library: the `pipeline()` function.
-
-<Tip>
-👀 See that <em>Open in Colab</em> button on the top right? Click on it to open a Google Colab notebook with all the code samples of this section. This button will be present in any section containing code examples. 
-
-If you want to run the examples locally, we recommend taking a look at the <a href="/course/chapter0">setup</a>.
-</Tip>
-
-## Transformers are everywhere!
-
-Transformer models are used to solve all kinds of NLP tasks, like the ones mentioned in the previous section. Here are some of the companies and organizations using Hugging Face and Transformer models, who also contribute back to the community by sharing their models:
-
-<img src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/companies.PNG" alt="Companies using Hugging Face" width="100%">
-
-The [🤗 Transformers library](https://github.com/huggingface/transformers) provides the functionality to create and use those shared models. The [Model Hub](https://huggingface.co/models) contains thousands of pretrained models that anyone can download and use. You can also upload your own models to the Hub!
-
-<Tip>
-⚠️ The Hugging Face Hub is not limited to Transformer models. Anyone can share any kind of models or datasets they want! <a href="https://huggingface.co/join">Create a huggingface.co</a> account to benefit from all available features!
-</Tip>
-
-Before diving into how Transformer models work under the hood, let's look at a few examples of how they can be used to solve some interesting NLP problems.
-
-## Working with pipelines
-
-<Youtube id="tiZFewofSLM" />
-
-The most basic object in the 🤗 Transformers library is the `pipeline()` function. It connects a model with its necessary preprocessing and postprocessing steps, allowing us to directly input any text and get an intelligible answer:
-
-```python
-from transformers import pipeline
-
-classifier = pipeline("sentiment-analysis")
-classifier("I've been waiting for a HuggingFace course my whole life.")
-```
-
-```python out
-[{'label': 'POSITIVE', 'score': 0.9598047137260437}]
-```
-
-We can even pass several sentences!
-
-```python
-classifier(
-    ["I've been waiting for a HuggingFace course my whole life.", "I hate this so much!"]
-)
-```
-
-```python out
-[{'label': 'POSITIVE', 'score': 0.9598047137260437},
- {'label': 'NEGATIVE', 'score': 0.9994558095932007}]
-```
-
-By default, this pipeline selects a particular pretrained model that has been fine-tuned for sentiment analysis in English. The model is downloaded and cached when you create the `classifier` object. If you rerun the command, the cached model will be used instead and there is no need to download the model again.
-
-There are three main steps involved when you pass some text to a pipeline:
-
-1. The text is preprocessed into a format the model can understand.
-2. The preprocessed inputs are passed to the model.
-3. The predictions of the model are post-processed, so you can make sense of them.
-
-
-Some of the currently [available pipelines](https://huggingface.co/transformers/main_classes/pipelines.html) are:
-
-- `feature-extraction` (get the vector representation of a text)
-- `fill-mask`
-- `ner` (named entity recognition)
-- `question-answering`
-- `sentiment-analysis`
-- `summarization`
-- `text-generation`
-- `translation`
-- `zero-shot-classification`
-
-Let's have a look at a few of these!
-
-## Zero-shot classification
-
-We'll start by tackling a more challenging task where we need to classify texts that haven't been labelled. This is a common scenario in real-world projects because annotating text is usually time-consuming and requires domain expertise. For this use case, the `zero-shot-classification` pipeline is very powerful: it allows you to specify which labels to use for the classification, so you don't have to rely on the labels of the pretrained model. You've already seen how the model can classify a sentence as positive or negative using those two labels — but it can also classify the text using any other set of labels you like.
-
-```python
-from transformers import pipeline
-
-classifier = pipeline("zero-shot-classification")
-classifier(
-    "This is a course about the Transformers library",
-    candidate_labels=["education", "politics", "business"],
-)
-```
-
-```python out
-{'sequence': 'This is a course about the Transformers library',
- 'labels': ['education', 'business', 'politics'],
- 'scores': [0.8445963859558105, 0.111976258456707, 0.043427448719739914]}
-```
-
-This pipeline is called _zero-shot_ because you don't need to fine-tune the model on your data to use it. It can directly return probability scores for any list of labels you want!
-
-<Tip>
-
-✏️ **Try it out!** Play around with your own sequences and labels and see how the model behaves.
-
-</Tip>
-
-
-## Text generation
-
-Now let's see how to use a pipeline to generate some text. The main idea here is that you provide a prompt and the model will auto-complete it by generating the remaining text. This is similar to the predictive text feature that is found on many phones. Text generation involves randomness, so it's normal if you don't get the same results as shown below.
-
-```python
-from transformers import pipeline
-
-generator = pipeline("text-generation")
-generator("In this course, we will teach you how to")
-```
-
-```python out
-[{'generated_text': 'In this course, we will teach you how to understand and use '
-                    'data flow and data interchange when handling user data. We '
-                    'will be working with one or more of the most commonly used '
-                    'data flows — data flows of various types, as seen by the '
-                    'HTTP'}]
-```
-
-You can control how many different sequences are generated with the argument `num_return_sequences` and the total length of the output text with the argument `max_length`.
-
-<Tip>
-
-✏️ **Try it out!** Use the `num_return_sequences` and `max_length` arguments to generate two sentences of 15 words each.
-
-</Tip>
-
-
-## Using any model from the Hub in a pipeline
-
-The previous examples used the default model for the task at hand, but you can also choose a particular model from the Hub to use in a pipeline for a specific task — say, text generation. Go to the [Model Hub](https://huggingface.co/models) and click on the corresponding tag on the left to display only the supported models for that task. You should get to a page like [this one](https://huggingface.co/models?pipeline_tag=text-generation).
-
-Let's try the [`distilgpt2`](https://huggingface.co/distilgpt2) model! Here's how to load it in the same pipeline as before:
-
-```python
-from transformers import pipeline
-
-generator = pipeline("text-generation", model="distilgpt2")
-generator(
-    "In this course, we will teach you how to",
-    max_length=30,
-    num_return_sequences=2,
-)
-```
-
-```python out
-[{'generated_text': 'In this course, we will teach you how to manipulate the world and '
-                    'move your mental and physical capabilities to your advantage.'},
- {'generated_text': 'In this course, we will teach you how to become an expert and '
-                    'practice realtime, and with a hands on experience on both real '
-                    'time and real'}]
-```
-
-You can refine your search for a model by clicking on the language tags, and pick a model that will generate text in another language. The Model Hub even contains checkpoints for multilingual models that support several languages.
-
-Once you select a model by clicking on it, you'll see that there is a widget enabling you to try it directly online. This way you can quickly test the model's capabilities before downloading it.
-
-<Tip>
-
-✏️ **Try it out!** Use the filters to find a text generation model for another language. Feel free to play with the widget and use it in a pipeline!
-
-</Tip>
-
-### The Inference API
-
-All the models can be tested directly through your browser using the Inference API, which is available on the Hugging Face [website](https://huggingface.co/). You can play with the model directly on this page by inputting custom text and watching the model process the input data.
-
-The Inference API that powers the widget is also available as a paid product, which comes in handy if you need it for your workflows. See the [pricing page](https://huggingface.co/pricing) for more details.
-
-## Mask filling
-
-The next pipeline you'll try is `fill-mask`. The idea of this task is to fill in the blanks in a given text:
-
-```python
-from transformers import pipeline
-
-unmasker = pipeline("fill-mask")
-unmasker("This course will teach you all about <mask> models.", top_k=2)
-```
-
-```python out
-[{'sequence': 'This course will teach you all about mathematical models.',
-  'score': 0.19619831442832947,
-  'token': 30412,
-  'token_str': ' mathematical'},
- {'sequence': 'This course will teach you all about computational models.',
-  'score': 0.04052725434303284,
-  'token': 38163,
-  'token_str': ' computational'}]
-```
-
-The `top_k` argument controls how many possibilities you want to be displayed. Note that here the model fills in the special `<mask>` word, which is often referred to as a *mask token*. Other mask-filling models might have different mask tokens, so it's always good to verify the proper mask word when exploring other models. One way to check it is by looking at the mask word used in the widget.
-
-<Tip>
-
-✏️ **Try it out!** Search for the `bert-base-cased` model on the Hub and identify its mask word in the Inference API widget. What does this model predict for the sentence in our `pipeline` example above?
-
-</Tip>
-
-## Named entity recognition
-
-Named entity recognition (NER) is a task where the model has to find which parts of the input text correspond to entities such as persons, locations, or organizations. Let's look at an example:
-
-```python
-from transformers import pipeline
-
-ner = pipeline("ner", grouped_entities=True)
-ner("My name is Sylvain and I work at Hugging Face in Brooklyn.")
-```
-
-```python out
-[{'entity_group': 'PER', 'score': 0.99816, 'word': 'Sylvain', 'start': 11, 'end': 18}, 
- {'entity_group': 'ORG', 'score': 0.97960, 'word': 'Hugging Face', 'start': 33, 'end': 45}, 
- {'entity_group': 'LOC', 'score': 0.99321, 'word': 'Brooklyn', 'start': 49, 'end': 57}
-]
-```
-
-Here the model correctly identified that Sylvain is a person (PER), Hugging Face an organization (ORG), and Brooklyn a location (LOC).
-
-We pass the option `grouped_entities=True` in the pipeline creation function to tell the pipeline to regroup together the parts of the sentence that correspond to the same entity: here the model correctly grouped "Hugging" and "Face" as a single organization, even though the name consists of multiple words. In fact, as we will see in the next chapter, the preprocessing even splits some words into smaller parts. For instance, `Sylvain` is split into four pieces: `S`, `##yl`, `##va`, and `##in`. In the post-processing step, the pipeline successfully regrouped those pieces.
-
-<Tip>
-
-✏️ **Try it out!** Search the Model Hub for a model able to do part-of-speech tagging (usually abbreviated as POS) in English. What does this model predict for the sentence in the example above?
-
-</Tip>
-
-## Question answering
-
-The `question-answering` pipeline answers questions using information from a given context:
-
-```python
-from transformers import pipeline
-
-question_answerer = pipeline("question-answering")
-question_answerer(
-    question="Where do I work?",
-    context="My name is Sylvain and I work at Hugging Face in Brooklyn",
-)
-```
-
-```python out
-{'score': 0.6385916471481323, 'start': 33, 'end': 45, 'answer': 'Hugging Face'}
-```
-
-Note that this pipeline works by extracting information from the provided context; it does not generate the answer.
-
-## Summarization
-
-Summarization is the task of reducing a text into a shorter text while keeping all (or most) of the important aspects referenced in the text. Here's an example:
-
-```python
-from transformers import pipeline
-
-summarizer = pipeline("summarization")
-summarizer(
-    """
-    America has changed dramatically during recent years. Not only has the number of 
-    graduates in traditional engineering disciplines such as mechanical, civil, 
-    electrical, chemical, and aeronautical engineering declined, but in most of 
-    the premier American universities engineering curricula now concentrate on 
-    and encourage largely the study of engineering science. As a result, there 
-    are declining offerings in engineering subjects dealing with infrastructure, 
-    the environment, and related issues, and greater concentration on high 
-    technology subjects, largely supporting increasingly complex scientific 
-    developments. While the latter is important, it should not be at the expense 
-    of more traditional engineering.
-
-    Rapidly developing economies such as China and India, as well as other 
-    industrial countries in Europe and Asia, continue to encourage and advance 
-    the teaching of engineering. Both China and India, respectively, graduate 
-    six and eight times as many traditional engineers as does the United States. 
-    Other industrial countries at minimum maintain their output, while America 
-    suffers an increasingly serious decline in the number of engineering graduates 
-    and a lack of well-educated engineers.
-"""
-)
-```
-
-```python out
-[{'summary_text': ' America has changed dramatically during recent years . The '
-                  'number of engineering graduates in the U.S. has declined in '
-                  'traditional engineering disciplines such as mechanical, civil '
-                  ', electrical, chemical, and aeronautical engineering . Rapidly '
-                  'developing economies such as China and India, as well as other '
-                  'industrial countries in Europe and Asia, continue to encourage '
-                  'and advance engineering .'}]
-```
-
-Like with text generation, you can specify a `max_length` or a `min_length` for the result.
-
-
-## Translation
-
-For translation, you can use a default model if you provide a language pair in the task name (such as `"translation_en_to_fr"`), but the easiest way is to pick the model you want to use on the [Model Hub](https://huggingface.co/models). Here we'll try translating from French to English:
-
-```python
-from transformers import pipeline
-
-translator = pipeline("translation", model="Helsinki-NLP/opus-mt-fr-en")
-translator("Ce cours est produit par Hugging Face.")
-```
-
-```python out
-[{'translation_text': 'This course is produced by Hugging Face.'}]
-```
-
-Like with text generation and summarization, you can specify a `max_length` or a `min_length` for the result.
-
-<Tip>
-
-✏️ **Try it out!** Search for translation models in other languages and try to translate the previous sentence into a few different languages.
-
-</Tip>
-
-The pipelines shown so far are mostly for demonstrative purposes. They were programmed for specific tasks and cannot perform variations of them. In the next chapter, you'll learn what's inside a `pipeline()` function and how to customize its behavior.

From 7195a26bc160daaa7d90f962151b4f9f743f9268 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 26 Apr 2022 20:39:59 +0100
Subject: [PATCH 053/127] Delete 10.mdx

---
 chapters/it/chapter1/10.mdx | 254 ------------------------------------
 1 file changed, 254 deletions(-)
 delete mode 100644 chapters/it/chapter1/10.mdx

diff --git a/chapters/it/chapter1/10.mdx b/chapters/it/chapter1/10.mdx
deleted file mode 100644
index 355cade7f..000000000
--- a/chapters/it/chapter1/10.mdx
+++ /dev/null
@@ -1,254 +0,0 @@
-<!-- DISABLE-FRONTMATTER-SECTIONS -->
-
-# End-of-chapter quiz
-
-This chapter covered a lot of ground! Don't worry if you didn't grasp all the details; the next chapters will help you understand how things work under the hood.
-
-First, though, let's test what you learned in this chapter!
-
-
-### 1. Explore the Hub and look for the `roberta-large-mnli` checkpoint. What task does it perform?
-
-
-<Question
-	choices={[
-		{
-			text: "Summarization",
-			explain: "Look again on the <a href=\"https://huggingface.co/roberta-large-mnli\">roberta-large-mnli page</a>."
-		},
-		{
-			text: "Text classification",
-			explain: "More precisely, it classifies if two sentences are logically linked across three labels (contradiction, neutral, entailment) — a task also called <em>natural language inference</em>.",
-			correct: true
-		},
-		{
-			text: "Text generation",
-			explain: "Look again on the <a href=\"https://huggingface.co/roberta-large-mnli\">roberta-large-mnli page</a>."
-		}
-	]}
-/>
-
-### 2. What will the following code return?
-
-```py
-from transformers import pipeline
-
-ner = pipeline("ner", grouped_entities=True)
-ner("My name is Sylvain and I work at Hugging Face in Brooklyn.")
-```
-
-<Question
-	choices={[
-		{
-			text: "It will return classification scores for this sentence, with labels \"positive\" or \"negative\".",
-			explain: "This is incorrect — this would be a <code>sentiment-analysis</code> pipeline."
-		},
-		{
-			text: "It will return a generated text completing this sentence.",
-			explain: "This is incorrect — it would be a <code>text-generation</code> pipeline.",
-		},
-		{
-			text: "It will return the words representing persons, organizations or locations.",
-			explain: "Furthermore, with <code>grouped_entities=True</code>, it will group together the words belonging to the same entity, like \"Hugging Face\".",
-			correct: true
-		}
-	]}
-/>
-
-### 3. What should replace ... in this code sample?
-
-```py
-from transformers import pipeline
-
-filler = pipeline("fill-mask", model="bert-base-cased")
-result = filler("...")
-```
-
-<Question
-	choices={[
-		{
-			text: "This &#60;mask> has been waiting for you.",
-			explain: "This is incorrect. Check out the <code>bert-base-cased</code> model card and try to spot your mistake."
-		},
-		{
-			text: "This [MASK] has been waiting for you.",
-			explain: "Correct! This model's mask token is [MASK].",
-			correct: true
-		},
-		{
-			text: "This man has been waiting for you.",
-			explain: "This is incorrect. This pipeline fills in masked words, so it needs a mask token somewhere."
-		}
-	]}
-/>
-
-### 4. Why will this code fail?
-
-```py
-from transformers import pipeline
-
-classifier = pipeline("zero-shot-classification")
-result = classifier("This is a course about the Transformers library")
-```
-
-<Question
-	choices={[
-		{
-			text: "This pipeline requires that labels be given to classify this text.",
-			explain: "Right — the correct code needs to include <code>candidate_labels=[...]</code>.",
-			correct: true
-		},
-		{
-			text: "This pipeline requires several sentences, not just one.",
-			explain: "This is incorrect, though when properly used, this pipeline can take a list of sentences to process (like all other pipelines)."
-		},
-		{
-			text: "The 🤗 Transformers library is broken, as usual.",
-			explain: "We won't dignify this answer with a comment!"
-		},
-		{
-			text: "This pipeline requires longer inputs; this one is too short.",
-			explain: "This is incorrect. Note that a very long text will be truncated when processed by this pipeline."
-		}
-	]}
-/>
-
-### 5. What does "transfer learning" mean?
-
-<Question
-	choices={[
-		{
-			text: "Transferring the knowledge of a pretrained model to a new model by training it on the same dataset.",
-			explain: "No, that would be two versions of the same model."
-		},
-		{
-			text: "Transferring the knowledge of a pretrained model to a new model by initializing the second model with the first model's weights.",
-			explain: "Correct: when the second model is trained on a new task, it *transfers* the knowledge of the first model.",
-			correct: true
-		},
-		{
-			text: "Transferring the knowledge of a pretrained model to a new model by building the second model with the same architecture as the first model.",
-			explain: "The architecture is just the way the model is built; there is no knowledge shared or transferred in this case."
-		}
-	]}
-/>
-
-### 6. True or false? A language model usually does not need labels for its pretraining.
-
-
-<Question
-	choices={[
-		{
-			text: "True",
-			explain: "The pretraining is usually <em>self-supervised</em>, which means the labels are created automatically from the inputs (like predicting the next word or filling in some masked words).",
-			correct: true
-		},
-		{
-			text: "False",
-			explain: "This is not the correct answer."
-		}
-	]}
-/>
-
-### 7. Select the sentence that best describes the terms "model," "architecture," and "weights."
-
-<Question
-	choices={[
-		{
-			text: "If a model is a building, its architecture is the blueprint and the weights are the people living inside.",
-			explain: "Following this metaphor, the weights would be the bricks and other materials used to construct the building."
-		},
-		{
-			text: "An architecture is a map to build a model and its weights are the cities represented on the map.",
-			explain: "The problem with this metaphor is that a map usually represents one existing reality (there is only one city in France named Paris). For a given architecture, multiple weights are possible."
-		},
-		{
-			text: "An architecture is a succession of mathematical functions to build a model and its weights are those functions parameters.",
-			explain: "The same set of mathematical functions (architecture) can be used to build different models by using different parameters (weights).",
-			correct: true
-		}
-	]}
-/>
-
-
-### 8. Which of these types of models would you use for completing prompts with generated text?
-
-<Question
-	choices={[
-		{
-			text: "An encoder model",
-			explain: "An encoder model generates a representation of the whole sentence that is better suited for tasks like classification."
-		},
-		{
-			text: "A decoder model",
-			explain: "Decoder models are perfectly suited for text generation from a prompt.",
-			correct: true
-		},
-		{
-			text: "A sequence-to-sequence model",
-			explain: "Sequence-to-sequence models are better suited for tasks where you want to generate sentences in relation to the input sentences, not a given prompt."
-		}
-	]}
-/>
-
-### 9. Which of those types of models would you use for summarizing texts?
-
-<Question
-	choices={[
-		{
-			text: "An encoder model",
-			explain: "An encoder model generates a representation of the whole sentence that is better suited for tasks like classification."
-		},
-		{
-			text: "A decoder model",
-			explain: "Decoder models are good for generating output text (like summaries), but they don't have the ability to exploit a context like the whole text to summarize."
-		},
-		{
-			text: "A sequence-to-sequence model",
-			explain: "Sequence-to-sequence models are perfectly suited for a summarization task.",
-			correct: true
-		}
-	]}
-/>
-
-### 10. Which of these types of models would you use for classifying text inputs according to certain labels?
-
-<Question
-	choices={[
-		{
-			text: "An encoder model",
-			explain: "An encoder model generates a representation of the whole sentence which is perfectly suited for a task like classification.",
-			correct: true
-		},
-		{
-			text: "A decoder model",
-			explain: "Decoder models are good for generating output texts, not extracting a label out of a sentence."
-		},
-		{
-			text: "A sequence-to-sequence model",
-			explain: "Sequence-to-sequence models are better suited for tasks where you want to generate text based on an input sentence, not a label.",
-		}
-	]}
-/>
-
-### 11. What possible source can the bias observed in a model have?
-
-<Question
-	choices={[
-		{
-			text: "The model is a fine-tuned version of a pretrained model and it picked up its bias from it.",
-			explain: "When applying Transfer Learning, the bias in the pretrained model used perspires in the fine-tuned model.",
-			correct: true
-		},
-		{
-			text: "The data the model was trained on is biased.",
-			explain: "This is the most obvious source of bias, but not the only one.",
-			correct: true
-		},
-		{
-			text: "The metric the model was optimizing for is biased.",
-			explain: "A less obvious source of bias is the way the model is trained. Your model will blindly optimize for whatever metric you chose, without any second thoughts.",
-			correct: true
-		}
-	]}
-/>

From c37b359d748961ea9ac0eeba05508c673c1a429f Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 26 Apr 2022 20:40:26 +0100
Subject: [PATCH 054/127] Delete chapters/it/chapter2 directory

---
 chapters/it/chapter2/1.mdx |  20 ---
 chapters/it/chapter2/2.mdx | 353 -------------------------------------
 chapters/it/chapter2/3.mdx | 228 ------------------------
 chapters/it/chapter2/4.mdx | 240 -------------------------
 chapters/it/chapter2/5.mdx | 338 -----------------------------------
 chapters/it/chapter2/6.mdx | 164 -----------------
 chapters/it/chapter2/7.mdx |  13 --
 chapters/it/chapter2/8.mdx | 305 --------------------------------
 chapters/it/chapter2/test  |   1 -
 9 files changed, 1662 deletions(-)
 delete mode 100644 chapters/it/chapter2/1.mdx
 delete mode 100644 chapters/it/chapter2/2.mdx
 delete mode 100644 chapters/it/chapter2/3.mdx
 delete mode 100644 chapters/it/chapter2/4.mdx
 delete mode 100644 chapters/it/chapter2/5.mdx
 delete mode 100644 chapters/it/chapter2/6.mdx
 delete mode 100644 chapters/it/chapter2/7.mdx
 delete mode 100644 chapters/it/chapter2/8.mdx
 delete mode 100644 chapters/it/chapter2/test

diff --git a/chapters/it/chapter2/1.mdx b/chapters/it/chapter2/1.mdx
deleted file mode 100644
index 9ab184b82..000000000
--- a/chapters/it/chapter2/1.mdx
+++ /dev/null
@@ -1,20 +0,0 @@
-# Introduction
-
-As you saw in [Chapter 1](/course/chapter1), Transformer models are usually very large. With millions to tens of *billions* of parameters, training and deploying these models is a complicated undertaking. Furthermore, with new models being released on a near-daily basis and each having its own implementation, trying them all out is no easy task.
-
-The 🤗 Transformers library was created to solve this problem. Its goal is to provide a single API through which any Transformer model can be loaded, trained, and saved. The library's main features are:
-
-- **Ease of use**: Downloading, loading, and using a state-of-the-art NLP model for inference can be done in just two lines of code.
-- **Flexibility**: At their core, all models are simple PyTorch `nn.Module` or TensorFlow `tf.keras.Model` classes and can be handled like any other models in their respective machine learning (ML) frameworks.
-- **Simplicity**: Hardly any abstractions are made across the library. The "All in one file" is a core concept: a model's forward pass is entirely defined in a single file, so that the code itself is understandable and hackable.
-
-This last feature makes 🤗 Transformers quite different from other ML libraries. The models are not built on modules 
-that are shared across files; instead, each model has its own layers. In addition to making the models more approachable and understandable, this allows you to easily experiment on one model without affecting others.
-
-This chapter will begin with an end-to-end example where we use a model and a tokenizer together to replicate the `pipeline()` function introduced in [Chapter 1](/course/chapter1). Next, we'll discuss the model API: we'll dive into the model and configuration classes, and show you how to load a model and how it processes numerical inputs to output predictions. 
-
-Then we'll look at the tokenizer API, which is the other main component of the `pipeline()` function. Tokenizers take care of the first and last processing steps, handling the conversion from text to numerical inputs for the neural network, and the conversion back to text when it is needed. Finally, we'll show you how to handle sending multiple sentences through a model in a prepared batch, then wrap it all up with a closer look at the high-level `tokenizer()` function.
-
-<Tip>
-⚠️ In order to benefit from all features available with the Model Hub and 🤗 Transformers, we recommend <a href="https://huggingface.co/join">creating an account</a>.
-</Tip>
\ No newline at end of file
diff --git a/chapters/it/chapter2/2.mdx b/chapters/it/chapter2/2.mdx
deleted file mode 100644
index a7715efc7..000000000
--- a/chapters/it/chapter2/2.mdx
+++ /dev/null
@@ -1,353 +0,0 @@
-<FrameworkSwitchCourse {fw} />
-
-# Behind the pipeline
-
-{#if fw === 'pt'}
-
-<DocNotebookDropdown
-  classNames="absolute z-10 right-0 top-0"
-  options={[
-    {label: "Google Colab", value: "https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter2/section2_pt.ipynb"},
-    {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter2/section2_pt.ipynb"},
-]} />
-
-{:else}
-
-<DocNotebookDropdown
-  classNames="absolute z-10 right-0 top-0"
-  options={[
-    {label: "Google Colab", value: "https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter2/section2_tf.ipynb"},
-    {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter2/section2_tf.ipynb"},
-]} />
-
-{/if}
-
-<Tip>
-This is the first section where the content is slightly different depending on whether you use PyTorch and TensorFlow. Toogle the switch on top of the title to select the platform you prefer!
-</Tip>
-
-{#if fw === 'pt'}
-<Youtube id="1pedAIvTWXk"/>
-{:else}
-<Youtube id="wVN12smEvqg"/>
-{/if}
-
-Let's start with a complete example, taking a look at what happened behind the scenes when we executed the following code in [Chapter 1](/course/chapter1):
-
-```python
-from transformers import pipeline
-
-classifier = pipeline("sentiment-analysis")
-classifier(
-    [
-        "I've been waiting for a HuggingFace course my whole life.",
-        "I hate this so much!",
-    ]
-)
-```
-
-and obtained:
-
-```python out
-[{'label': 'POSITIVE', 'score': 0.9598047137260437},
- {'label': 'NEGATIVE', 'score': 0.9994558095932007}]
-```
-
-As we saw in [Chapter 1](/course/chapter1), this pipeline groups together three steps: preprocessing, passing the inputs through the model, and postprocessing:
-
-<div class="flex justify-center">
-<img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter2/full_nlp_pipeline.svg" alt="The full NLP pipeline: tokenization of text, conversion to IDs, and inference through the Transformer model and the model head."/>
-<img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter2/full_nlp_pipeline-dark.svg" alt="The full NLP pipeline: tokenization of text, conversion to IDs, and inference through the Transformer model and the model head."/>
-</div>
-
-Let's quickly go over each of these.
-
-## Preprocessing with a tokenizer
-
-Like other neural networks, Transformer models can't process raw text directly, so the first step of our pipeline is to convert the text inputs into numbers that the model can make sense of. To do this we use a *tokenizer*, which will be responsible for:
-
-- Splitting the input into words, subwords, or symbols (like punctuation) that are called *tokens*
-- Mapping each token to an integer
-- Adding additional inputs that may be useful to the model
-
-All this preprocessing needs to be done in exactly the same way as when the model was pretrained, so we first need to download that information from the [Model Hub](https://huggingface.co/models). To do this, we use the `AutoTokenizer` class and its `from_pretrained()` method. Using the checkpoint name of our model, it will automatically fetch the data associated with the model's tokenizer and cache it (so it's only downloaded the first time you run the code below).
-
-Since the default checkpoint of the `sentiment-analysis` pipeline is `distilbert-base-uncased-finetuned-sst-2-english` (you can see its model card [here](https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english)), we run the following:
-
-```python
-from transformers import AutoTokenizer
-
-checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
-tokenizer = AutoTokenizer.from_pretrained(checkpoint)
-```
-
-Once we have the tokenizer, we can directly pass our sentences to it and we'll get back a dictionary that's ready to feed to our model! The only thing left to do is to convert the list of input IDs to tensors.
-
-You can use 🤗 Transformers without having to worry about which ML framework is used as a backend; it might be PyTorch or TensorFlow, or Flax for some models. However, Transformer models only accept *tensors* as input. If this is your first time hearing about tensors, you can think of them as NumPy arrays instead. A NumPy array can be a scalar (0D), a vector (1D), a matrix (2D), or have more dimensions. It's effectively a tensor; other ML frameworks' tensors behave similarly, and are usually as simple to instantiate as NumPy arrays.
-
-To specify the type of tensors we want to get back (PyTorch, TensorFlow, or plain NumPy), we use the `return_tensors` argument:
-
-{#if fw === 'pt'}
-```python
-raw_inputs = [
-    "I've been waiting for a HuggingFace course my whole life.",
-    "I hate this so much!",
-]
-inputs = tokenizer(raw_inputs, padding=True, truncation=True, return_tensors="pt")
-print(inputs)
-```
-{:else}
-```python
-raw_inputs = [
-    "I've been waiting for a HuggingFace course my whole life.",
-    "I hate this so much!",
-]
-inputs = tokenizer(raw_inputs, padding=True, truncation=True, return_tensors="tf")
-print(inputs)
-```
-{/if}
-
-Don't worry about padding and truncation just yet; we'll explain those later. The main things to remember here are that you can pass one sentence or a list of sentences, as well as specifying the type of tensors you want to get back (if no type is passed, you will get a list of lists as a result).
-
-{#if fw === 'pt'}
-
-Here's what the results look like as PyTorch tensors:
-
-```python out
-{
-    'input_ids': tensor([
-        [  101,  1045,  1005,  2310,  2042,  3403,  2005,  1037, 17662, 12172, 2607,  2026,  2878,  2166,  1012,   102],
-        [  101,  1045,  5223,  2023,  2061,  2172,   999,   102,     0,     0,     0,     0,     0,     0,     0,     0]
-    ]), 
-    'attention_mask': tensor([
-        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
-        [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
-    ])
-}
-```
-{:else}
-
-Here's what the results look like as TensorFlow tensors:
-
-```python out
-{
-    'input_ids': <tf.Tensor: shape=(2, 16), dtype=int32, numpy=
-        array([
-            [  101,  1045,  1005,  2310,  2042,  3403,  2005,  1037, 17662, 12172,  2607,  2026,  2878,  2166,  1012,   102],
-            [  101,  1045,  5223,  2023,  2061,  2172,   999,   102,     0,     0,     0,     0,     0,     0,     0,     0]
-        ], dtype=int32)>, 
-    'attention_mask': <tf.Tensor: shape=(2, 16), dtype=int32, numpy=
-        array([
-            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
-            [1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
-        ], dtype=int32)>
-}
-```
-{/if}
-
-The output itself is a dictionary containing two keys, `input_ids` and `attention_mask`. `input_ids` contains two rows of integers (one for each sentence) that are the unique identifiers of the tokens in each sentence. We'll explain what the `attention_mask` is later in this chapter. 
-
-## Going through the model
-
-{#if fw === 'pt'}
-We can download our pretrained model the same way we did with our tokenizer. 🤗 Transformers provides an `AutoModel` class which also has a `from_pretrained()` method:
-
-```python
-from transformers import AutoModel
-
-checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
-model = AutoModel.from_pretrained(checkpoint)
-```
-{:else}
-We can download our pretrained model the same way we did with our tokenizer. 🤗 Transformers provides an `TFAutoModel` class which also has a `from_pretrained` method:
-
-```python
-from transformers import TFAutoModel
-
-checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
-model = TFAutoModel.from_pretrained(checkpoint)
-```
-{/if}
-
-In this code snippet, we have downloaded the same checkpoint we used in our pipeline before (it should actually have been cached already) and instantiated a model with it.
-
-This architecture contains only the base Transformer module: given some inputs, it outputs what we'll call *hidden states*, also known as *features*. For each model input, we'll retrieve a high-dimensional vector representing the **contextual understanding of that input by the Transformer model**.
-
-If this doesn't make sense, don't worry about it. We'll explain it all later.
-
-While these hidden states can be useful on their own, they're usually inputs to another part of the model, known as the *head*. In [Chapter 1](/course/chapter1), the different tasks could have been performed with the same architecture, but each of these tasks will have a different head associated with it.
-
-### A high-dimensional vector?
-
-The vector output by the Transformer module is usually large. It generally has three dimensions:
-
-- **Batch size**: The number of sequences processed at a time (2 in our example).
-- **Sequence length**: The length of the numerical representation of the sequence (16 in our example).
-- **Hidden size**: The vector dimension of each model input.
-
-It is said to be "high dimensional" because of the last value. The hidden size can be very large (768 is common for smaller models, and in larger models this can reach 3072 or more).
-
-We can see this if we feed the inputs we preprocessed to our model:
-
-{#if fw === 'pt'}
-```python
-outputs = model(**inputs)
-print(outputs.last_hidden_state.shape)
-```
-
-```python out
-torch.Size([2, 16, 768])
-```
-{:else}
-```py
-outputs = model(inputs)
-print(outputs.last_hidden_state.shape)
-```
-
-```python out
-(2, 16, 768)
-```
-{/if}
-
-Note that the outputs of 🤗 Transformers models behave like `namedtuple`s or dictionaries. You can access the elements by attributes (like we did) or by key (`outputs["last_hidden_state"]`), or even by index if you know exactly where the thing you are looking for is (`outputs[0]`).
-
-### Model heads: Making sense out of numbers
-
-The model heads take the high-dimensional vector of hidden states as input and project them onto a different dimension. They are usually composed of one or a few linear layers:
-
-<div class="flex justify-center">
-<img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter2/transformer_and_head.svg" alt="A Transformer network alongside its head."/>
-<img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter2/transformer_and_head-dark.svg" alt="A Transformer network alongside its head."/>
-</div>
-
-The output of the Transformer model is sent directly to the model head to be processed.
-
-In this diagram, the model is represented by its embeddings layer and the subsequent layers. The embeddings layer converts each input ID in the tokenized input into a vector that represents the associated token. The subsequent layers manipulate those vectors using the attention mechanism to produce the final representation of the sentences.
-
-There are many different architectures available in 🤗 Transformers, with each one designed around tackling a specific task. Here is a non-exhaustive list:
-
-- `*Model` (retrieve the hidden states)
-- `*ForCausalLM`
-- `*ForMaskedLM`
-- `*ForMultipleChoice`
-- `*ForQuestionAnswering`
-- `*ForSequenceClassification`
-- `*ForTokenClassification`
-- and others 🤗
-
-{#if fw === 'pt'}
-For our example, we will need a model with a sequence classification head (to be able to classify the sentences as positive or negative). So, we won't actually use the `AutoModel` class, but `AutoModelForSequenceClassification`:
-
-```python
-from transformers import AutoModelForSequenceClassification
-
-checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
-model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
-outputs = model(**inputs)
-```
-{:else}
-For our example, we will need a model with a sequence classification head (to be able to classify the sentences as positive or negative). So, we won't actually use the `TFAutoModel` class, but `TFAutoModelForSequenceClassification`:
-
-```python
-from transformers import TFAutoModelForSequenceClassification
-
-checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
-model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint)
-outputs = model(inputs)
-```
-{/if}
-
-Now if we look at the shape of our inputs, the dimensionality will be much lower: the model head takes as input the high-dimensional vectors we saw before, and outputs vectors containing two values (one per label):
-
-```python
-print(outputs.logits.shape)
-```
-
-{#if fw === 'pt'}
-```python out
-torch.Size([2, 2])
-```
-{:else}
-```python out
-(2, 2)
-```
-{/if}
-
-Since we have just two sentences and two labels, the result we get from our model is of shape 2 x 2.
-
-## Postprocessing the output
-
-The values we get as output from our model don't necessarily make sense by themselves. Let's take a look:
-
-```python
-print(outputs.logits)
-```
-
-{#if fw === 'pt'}
-```python out
-tensor([[-1.5607,  1.6123],
-        [ 4.1692, -3.3464]], grad_fn=<AddmmBackward>)
-```
-{:else}
-```python out
-<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
-    array([[-1.5606991,  1.6122842],
-           [ 4.169231 , -3.3464472]], dtype=float32)>
-```
-{/if}
-
-Our model predicted `[-1.5607, 1.6123]` for the first sentence and `[ 4.1692, -3.3464]` for the second one. Those are not probabilities but *logits*, the raw, unnormalized scores outputted by the last layer of the model. To be converted to probabilities, they need to go through a [SoftMax](https://en.wikipedia.org/wiki/Softmax_function) layer (all 🤗 Transformers models output the logits, as the loss function for training will generally fuse the last activation function, such as SoftMax, with the actual loss function, such as cross entropy):
-
-{#if fw === 'pt'}
-```py
-import torch
-
-predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
-print(predictions)
-```
-{:else}
-```py
-import tensorflow as tf
-
-predictions = tf.math.softmax(outputs.logits, axis=-1)
-print(predictions)
-```
-{/if}
-
-{#if fw === 'pt'}
-```python out
-tensor([[4.0195e-02, 9.5980e-01],
-        [9.9946e-01, 5.4418e-04]], grad_fn=<SoftmaxBackward>)
-```
-{:else}
-```python out
-tf.Tensor(
-[[4.01951671e-02 9.59804833e-01]
- [9.9945587e-01 5.4418424e-04]], shape=(2, 2), dtype=float32)
-```
-{/if}
-
-Now we can see that the model predicted `[0.0402, 0.9598]` for the first sentence and `[0.9995,  0.0005]` for the second one. These are recognizable probability scores.
-
-To get the labels corresponding to each position, we can inspect the `id2label` attribute of the model config (more on this in the next section):
-
-```python
-model.config.id2label
-```
-
-```python out
-{0: 'NEGATIVE', 1: 'POSITIVE'}
-```
-
-Now we can conclude that the model predicted the following:
- 
-- First sentence: NEGATIVE: 0.0402, POSITIVE: 0.9598
-- Second sentence: NEGATIVE: 0.9995, POSITIVE: 0.0005
-
-We have successfully reproduced the three steps of the pipeline: preprocessing with tokenizers, passing the inputs through the model, and postprocessing! Now let's take some time to dive deeper into each of those steps.
-
-<Tip>
-
-✏️ **Try it out!** Choose two (or more) texts of your own and run them through the `sentiment-analysis` pipeline. Then replicate the steps you saw here yourself and check that you obtain the same results!
-
-</Tip>
diff --git a/chapters/it/chapter2/3.mdx b/chapters/it/chapter2/3.mdx
deleted file mode 100644
index c9100c42c..000000000
--- a/chapters/it/chapter2/3.mdx
+++ /dev/null
@@ -1,228 +0,0 @@
-<FrameworkSwitchCourse {fw} />
-
-# Models
-
-{#if fw === 'pt'}
-
-<DocNotebookDropdown
-  classNames="absolute z-10 right-0 top-0"
-  options={[
-    {label: "Google Colab", value: "https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter2/section3_pt.ipynb"},
-    {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter2/section3_pt.ipynb"},
-]} />
-
-{:else}
-
-<DocNotebookDropdown
-  classNames="absolute z-10 right-0 top-0"
-  options={[
-    {label: "Google Colab", value: "https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter2/section3_tf.ipynb"},
-    {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter2/section3_tf.ipynb"},
-]} />
-
-{/if}
-
-{#if fw === 'pt'}
-<Youtube id="AhChOFRegn4"/>
-{:else}
-<Youtube id="d3JVgghSOew"/>
-{/if}
-
-{#if fw === 'pt'}
-In this section we'll take a closer look at creating and using a model. We'll use the `AutoModel` class, which is handy when you want to instantiate any model from a checkpoint.
-
-The `AutoModel` class and all of its relatives are actually simple wrappers over the wide variety of models available in the library. It's a clever wrapper as it can automatically guess the appropriate model architecture for your checkpoint, and then instantiates a model with this architecture.
-
-{:else}
-In this section we'll take a closer look at creating and using a model. We'll use the `TFAutoModel` class, which is handy when you want to instantiate any model from a checkpoint.
-
-The `TFAutoModel` class and all of its relatives are actually simple wrappers over the wide variety of models available in the library. It's a clever wrapper as it can automatically guess the appropriate model architecture for your checkpoint, and then instantiates a model with this architecture.
-
-{/if}
-
-However, if you know the type of model you want to use, you can use the class that defines its architecture directly. Let's take a look at how this works with a BERT model.
-
-## Creating a Transformer
-
-The first thing we'll need to do to initialize a BERT model is load a configuration object:
-
-{#if fw === 'pt'}
-```py
-from transformers import BertConfig, BertModel
-
-# Building the config
-config = BertConfig()
-
-# Building the model from the config
-model = BertModel(config)
-```
-{:else}
-```py
-from transformers import BertConfig, TFBertModel
-
-# Building the config
-config = BertConfig()
-
-# Building the model from the config
-model = TFBertModel(config)
-```
-{/if}
-
-The configuration contains many attributes that are used to build the model:
-
-```py
-print(config)
-```
-
-```python out
-BertConfig {
-  [...]
-  "hidden_size": 768,
-  "intermediate_size": 3072,
-  "max_position_embeddings": 512,
-  "num_attention_heads": 12,
-  "num_hidden_layers": 12,
-  [...]
-}
-```
-
-While you haven't seen what all of these attributes do yet, you should recognize some of them: the `hidden_size` attribute defines the size of the `hidden_states` vector, and `num_hidden_layers` defines the number of layers the Transformer model has.
-
-### Different loading methods
-
-Creating a model from the default configuration initializes it with random values:
-
-{#if fw === 'pt'}
-```py
-from transformers import BertConfig, BertModel
-
-config = BertConfig()
-model = BertModel(config)
-
-# Model is randomly initialized!
-```
-{:else}
-```py
-from transformers import BertConfig, TFBertModel
-
-config = BertConfig()
-model = TFBertModel(config)
-
-# Model is randomly initialized!
-```
-{/if}
-
-The model can be used in this state, but it will output gibberish; it needs to be trained first. We could train the model from scratch on the task at hand, but as you saw in [Chapter 1](/course/chapter1), this would require a long time and a lot of data, and it would have a non-negligible environmental impact. To avoid unnecessary and duplicated effort, it's imperative to be able to share and reuse models that have already been trained.
-
-Loading a Transformer model that is already trained is simple — we can do this using the `from_pretrained()` method:
-
-{#if fw === 'pt'}
-```py
-from transformers import BertModel
-
-model = BertModel.from_pretrained("bert-base-cased")
-```
-
-As you saw earlier, we could replace `BertModel` with the equivalent `AutoModel` class. We'll do this from now on as this produces checkpoint-agnostic code; if your code works for one checkpoint, it should work seamlessly with another. This applies even if the architecture is different, as long as the checkpoint was trained for a similar task (for example, a sentiment analysis task).
-
-{:else}
-```py
-from transformers import TFBertModel
-
-model = TFBertModel.from_pretrained("bert-base-cased")
-```
-
-As you saw earlier, we could replace `TFBertModel` with the equivalent `TFAutoModel` class. We'll do this from now on as this produces checkpoint-agnostic code; if your code works for one checkpoint, it should work seamlessly with another. This applies even if the architecture is different, as long as the checkpoint was trained for a similar task (for example, a sentiment analysis task).
-
-{/if}
-
-In the code sample above we didn't use `BertConfig`, and instead loaded a pretrained model via the `bert-base-cased` identifier. This is a model checkpoint that was trained by the authors of BERT themselves; you can find more details about it in its [model card](https://huggingface.co/bert-base-cased).
-
-This model is now initialized with all the weights of the checkpoint. It can be used directly for inference on the tasks it was trained on, and it can also be fine-tuned on a new task. By training with pretrained weights rather than from scratch, we can quickly achieve good results.
-
-The weights have been downloaded and cached (so future calls to the `from_pretrained()` method won't re-download them) in the cache folder, which defaults to *~/.cache/huggingface/transformers*. You can customize your cache folder by setting the `HF_HOME` environment variable.
-
-The identifier used to load the model can be the identifier of any model on the Model Hub, as long as it is compatible with the BERT architecture. The entire list of available BERT checkpoints can be found [here](https://huggingface.co/models?filter=bert).
-
-### Saving methods
-
-Saving a model is as easy as loading one — we use the `save_pretrained()` method, which is analogous to the `from_pretrained()` method:
-
-```py
-model.save_pretrained("directory_on_my_computer")
-```
-
-This saves two files to your disk:
-
-{#if fw === 'pt'}
-```
-ls directory_on_my_computer
-
-config.json pytorch_model.bin
-```
-{:else}
-```
-ls directory_on_my_computer
-
-config.json tf_model.h5
-```
-{/if}
-
-If you take a look at the *config.json* file, you'll recognize the attributes necessary to build the model architecture. This file also contains some metadata, such as where the checkpoint originated and what 🤗 Transformers version you were using when you last saved the checkpoint.
-
-{#if fw === 'pt'}
-The *pytorch_model.bin* file is known as the *state dictionary*; it contains all your model's weights. The two files go hand in hand; the configuration is necessary to know your model's architecture, while the model weights are your model's parameters.
-
-{:else}
-The *tf_model.h5* file is known as the *state dictionary*; it contains all your model's weights. The two files go hand in hand; the configuration is necessary to know your model's architecture, while the model weights are your model's parameters.
-
-{/if}
-
-## Using a Transformer model for inference
-
-Now that you know how to load and save a model, let's try using it to make some predictions. Transformer models can only process numbers — numbers that the tokenizer generates. But before we discuss tokenizers, let's explore what inputs the model accepts.
-
-Tokenizers can take care of casting the inputs to the appropriate framework's tensors, but to help you understand what's going on, we'll take a quick look at what must be done before sending the inputs to the model.
-
-Let's say we have a couple of sequences:
-
-```py
-sequences = ["Hello!", "Cool.", "Nice!"]
-```
-
-The tokenizer converts these to vocabulary indices which are typically called *input IDs*. Each sequence is now a list of numbers! The resulting output is:
-
-```py no-format
-encoded_sequences = [
-    [101, 7592, 999, 102],
-    [101, 4658, 1012, 102],
-    [101, 3835, 999, 102],
-]
-```
-
-This is a list of encoded sequences: a list of lists. Tensors only accept rectangular shapes (think matrices). This "array" is already of rectangular shape, so converting it to a tensor is easy:
-
-{#if fw === 'pt'}
-```py
-import torch
-
-model_inputs = torch.tensor(encoded_sequences)
-```
-{:else}
-```py
-import tensorflow as tf
-
-model_inputs = tf.constant(encoded_sequences)
-```
-{/if}
-
-### Using the tensors as inputs to the model
-
-Making use of the tensors with the model is extremely simple — we just call the model with the inputs:
-
-```py
-output = model(model_inputs)
-```
-
-While the model accepts a lot of different arguments, only the input IDs are necessary. We'll explain what the other arguments do and when they are required later, 
-but first we need to take a closer look at the tokenizers that build the inputs that a Transformer model can understand.
diff --git a/chapters/it/chapter2/4.mdx b/chapters/it/chapter2/4.mdx
deleted file mode 100644
index ccebe04ec..000000000
--- a/chapters/it/chapter2/4.mdx
+++ /dev/null
@@ -1,240 +0,0 @@
-<FrameworkSwitchCourse {fw} />
-
-# Tokenizers
-
-{#if fw === 'pt'}
-
-<DocNotebookDropdown
-  classNames="absolute z-10 right-0 top-0"
-  options={[
-    {label: "Google Colab", value: "https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter2/section4_pt.ipynb"},
-    {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter2/section4_pt.ipynb"},
-]} />
-
-{:else}
-
-<DocNotebookDropdown
-  classNames="absolute z-10 right-0 top-0"
-  options={[
-    {label: "Google Colab", value: "https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter2/section4_tf.ipynb"},
-    {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter2/section4_tf.ipynb"},
-]} />
-
-{/if}
-
-<Youtube id="VFp38yj8h3A"/>
-
-Tokenizers are one of the core components of the NLP pipeline. They serve one purpose: to translate text into data that can be processed by the model. Models can only process numbers, so tokenizers need to convert our text inputs to numerical data. In this section, we'll explore exactly what happens in the tokenization pipeline. 
-
-In NLP tasks, the data that is generally processed is raw text. Here's an example of such text:
-
-```
-Jim Henson was a puppeteer
-```
-
-However, models can only process numbers, so we need to find a way to convert the raw text to numbers. That's what the tokenizers do, and there are a lot of ways to go about this. The goal is to find the most meaningful representation — that is, the one that makes the most sense to the model — and, if possible, the smallest representation.
-
-Let's take a look at some examples of tokenization algorithms, and try to answer some of the questions you may have about tokenization.
-
-## Word-based
-
-<Youtube id="nhJxYji1aho"/>
-
-The first type of tokenizer that comes to mind is _word-based_. It's generally very easy to set up and use with only a few rules, and it often yields decent results. For example, in the image below, the goal is to split the raw text into words and find a numerical representation for each of them:
-
-<div class="flex justify-center">
-  <img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter2/word_based_tokenization.svg" alt="An example of word-based tokenization."/>
-  <img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter2/word_based_tokenization-dark.svg" alt="An example of word-based tokenization."/>
-</div>
-
-There are different ways to split the text. For example, we could could use whitespace to tokenize the text into words by applying Python's `split()` function:
-
-```py
-tokenized_text = "Jim Henson was a puppeteer".split()
-print(tokenized_text)
-```
-
-```python out
-['Jim', 'Henson', 'was', 'a', 'puppeteer']
-```
-
-There are also variations of word tokenizers that have extra rules for punctuation. With this kind of tokenizer, we can end up with some pretty large "vocabularies," where a vocabulary is defined by the total number of independent tokens that we have in our corpus.
-
-Each word gets assigned an ID, starting from 0 and going up to the size of the vocabulary. The model uses these IDs to identify each word.
-
-If we want to completely cover a language with a word-based tokenizer, we'll need to have an identifier for each word in the language, which will generate a huge amount of tokens. For example, there are over 500,000 words in the English language, so to build a map from each word to an input ID we'd need to keep track of that many IDs. Furthermore, words like "dog" are represented differently from words like "dogs", and the model will initially have no way of knowing that "dog" and "dogs" are similar: it will identify the two words as unrelated. The same applies to other similar words, like "run" and "running", which the model will not see as being similar initially.
-
-Finally, we need a custom token to represent words that are not in our vocabulary. This is known as the "unknown" token, often represented as "[UNK]" or "&lt;unk&gt;". It's generally a bad sign if you see that the tokenizer is producing a lot of these tokens, as it wasn't able to retrieve a sensible representation of a word and you're losing information along the way. The goal when crafting the vocabulary is to do it in such a way that the tokenizer tokenizes as few words as possible into the unknown token.
-
-One way to reduce the amount of unknown tokens is to go one level deeper, using a _character-based_ tokenizer.
-
-## Character-based
-
-<Youtube id="ssLq_EK2jLE"/>
-
-Character-based tokenizers split the text into characters, rather than words. This has two primary benefits:
-
-- The vocabulary is much smaller.
-- There are much fewer out-of-vocabulary (unknown) tokens, since every word can be built from characters.
-
-But here too some questions arise concerning spaces and punctuation:
-
-<div class="flex justify-center">
-  <img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter2/character_based_tokenization.svg" alt="An example of character-based tokenization."/>
-  <img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter2/character_based_tokenization-dark.svg" alt="An example of character-based tokenization."/>
-</div>
-
-This approach isn't perfect either. Since the representation is now based on characters rather than words, one could argue that, intuitively, it's less meaningful: each character doesn't mean a lot on its own, whereas that is the case with words. However, this again differs according to the language; in Chinese, for example, each character carries more information than a character in a Latin language.
-
-Another thing to consider is that we'll end up with a very large amount of tokens to be processed by our model: whereas a word would only be a single token with a word-based tokenizer, it can easily turn into 10 or more tokens when converted into characters.
-
-To get the best of both worlds, we can use a third technique that combines the two approaches: *subword tokenization*.
-
-## Subword tokenization
-
-<Youtube id="zHvTiHr506c"/>
-
-Subword tokenization algorithms rely on the principle that frequently used words should not be split into smaller subwords, but rare words should be decomposed into meaningful subwords.
-
-For instance, "annoyingly" might be considered a rare word and could be decomposed into "annoying" and "ly". These are both likely to appear more frequently as standalone subwords, while at the same time the meaning of "annoyingly" is kept by the composite meaning of "annoying" and "ly".
-
-Here is an example showing how a subword tokenization algorithm would tokenize the sequence "Let's do tokenization!":
-
-<div class="flex justify-center">
-  <img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter2/bpe_subword.svg" alt="A subword tokenization algorithm."/>
-  <img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter2/bpe_subword-dark.svg" alt="A subword tokenization algorithm."/>
-</div>
-
-These subwords end up providing a lot of semantic meaning: for instance, in the example above "tokenization" was split into "token" and "ization", two tokens that have a semantic meaning while being space-efficient (only two tokens are needed to represent a long word). This allows us to have relatively good coverage with small vocabularies, and close to no unknown tokens.
-
-This approach is especially useful in agglutinative languages such as Turkish, where you can form (almost) arbitrarily long complex words by stringing together subwords.
-
-### And more!
-
-Unsurprisingly, there are many more techniques out there. To name a few:
-
-- Byte-level BPE, as used in GPT-2
-- WordPiece, as used in BERT
-- SentencePiece or Unigram, as used in several multilingual models
-
-You should now have sufficient knowledge of how tokenizers work to get started with the API.
-
-## Loading and saving
-
-Loading and saving tokenizers is as simple as it is with models. Actually, it's based on the same two methods: `from_pretrained()` and `save_pretrained()`. These methods will load or save the algorithm used by the tokenizer (a bit like the *architecture* of the model) as well as its vocabulary (a bit like the *weights* of the model).
-
-Loading the BERT tokenizer trained with the same checkpoint as BERT is done the same way as loading the model, except we use the `BertTokenizer` class:
-
-```py
-from transformers import BertTokenizer
-
-tokenizer = BertTokenizer.from_pretrained("bert-base-cased")
-```
-
-{#if fw === 'pt'}
-Similar to `AutoModel`, the `AutoTokenizer` class will grab the proper tokenizer class in the library based on the checkpoint name, and can be used directly with any checkpoint:
-
-{:else}
-Similar to `TFAutoModel`, the `AutoTokenizer` class will grab the proper tokenizer class in the library based on the checkpoint name, and can be used directly with any checkpoint:
-
-{/if}
-
-```py
-from transformers import AutoTokenizer
-
-tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
-```
-
-We can now use the tokenizer as shown in the previous section:
-
-```python
-tokenizer("Using a Transformer network is simple")
-```
-
-```python out
-{'input_ids': [101, 7993, 170, 11303, 1200, 2443, 1110, 3014, 102],
- 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0],
- 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1]}
-```
-
-Saving a tokenizer is identical to saving a model:
-
-```py
-tokenizer.save_pretrained("directory_on_my_computer")
-```
-
-We'll talk more about `token_type_ids` in [Chapter 3](/course/chapter3), and we'll explain the `attention_mask` key a little later. First, let's see how the `input_ids` are generated. To do this, we'll need to look at the intermediate methods of the tokenizer.
-
-## Encoding
-
-<Youtube id="Yffk5aydLzg"/>
-
-Translating text to numbers is known as _encoding_. Encoding is done in a two-step process: the tokenization, followed by the conversion to input IDs.
-
-As we've seen, the first step is to split the text into words (or parts of words, punctuation symbols, etc.), usually called *tokens*. There are multiple rules that can govern that process, which is why we need to instantiate the tokenizer using the name of the model, to make sure we use the same rules that were used when the model was pretrained.
-
-The second step is to convert those tokens into numbers, so we can build a tensor out of them and feed them to the model. To do this, the tokenizer has a *vocabulary*, which is the part we download when we instantiate it with the `from_pretrained()` method. Again, we need to use the same vocabulary used when the model was pretrained.
-
-To get a better understanding of the two steps, we'll explore them separately. Note that we will use some methods that perform parts of the tokenization pipeline separately to show you the intermediate results of those steps, but in practice, you should call the tokenizer directly on your inputs (as shown in the section 2).
-
-### Tokenization
-
-The tokenization process is done by the `tokenize()` method of the tokenizer:
-
-```py
-from transformers import AutoTokenizer
-
-tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
-
-sequence = "Using a Transformer network is simple"
-tokens = tokenizer.tokenize(sequence)
-
-print(tokens)
-```
-
-The output of this method is a list of strings, or tokens:
-
-```python out
-['Using', 'a', 'transform', '##er', 'network', 'is', 'simple']
-```
-
-This tokenizer is a subword tokenizer: it splits the words until it obtains tokens that can be represented by its vocabulary. That's the case here with `transformer`, which is split into two tokens: `transform` and `##er`.
-
-### From tokens to input IDs
-
-The conversion to input IDs is handled by the `convert_tokens_to_ids()` tokenizer method:
-
-```py
-ids = tokenizer.convert_tokens_to_ids(tokens)
-
-print(ids)
-```
-
-```python out
-[7993, 170, 11303, 1200, 2443, 1110, 3014]
-```
-
-These outputs, once converted to the appropriate framework tensor, can then be used as inputs to a model as seen earlier in this chapter.
-
-<Tip>
-
-✏️ **Try it out!** Replicate the two last steps (tokenization and conversion to input IDs) on the input sentences we used in section 2 ("I've been waiting for a HuggingFace course my whole life." and "I hate this so much!"). Check that you get the same input IDs we got earlier!
-
-</Tip>
-
-## Decoding
-
-*Decoding* is going the other way around: from vocabulary indices, we want to get a string. This can be done with the `decode()` method as follows:
-
-```py
-decoded_string = tokenizer.decode([7993, 170, 11303, 1200, 2443, 1110, 3014])
-print(decoded_string)
-```
-
-```python out
-'Using a Transformer network is simple'
-```
-
-Note that the `decode` method not only converts the indices back to tokens, but also groups together the tokens that were part of the same words to produce a readable sentence. This behavior will be extremely useful when we use models that predict new text (either text generated from a prompt, or for sequence-to-sequence problems like translation or summarization).
-
-By now you should understand the atomic operations a tokenizer can handle: tokenization, conversion to IDs, and converting IDs back to a string. However, we've just scraped the tip of the iceberg. In the following section, we'll take our approach to its limits and take a look at how to overcome them.
diff --git a/chapters/it/chapter2/5.mdx b/chapters/it/chapter2/5.mdx
deleted file mode 100644
index 5a692aa19..000000000
--- a/chapters/it/chapter2/5.mdx
+++ /dev/null
@@ -1,338 +0,0 @@
-<FrameworkSwitchCourse {fw} />
-
-# Handling multiple sequences
-
-{#if fw === 'pt'}
-
-<DocNotebookDropdown
-  classNames="absolute z-10 right-0 top-0"
-  options={[
-    {label: "Google Colab", value: "https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter2/section5_pt.ipynb"},
-    {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter2/section5_pt.ipynb"},
-]} />
-
-{:else}
-
-<DocNotebookDropdown
-  classNames="absolute z-10 right-0 top-0"
-  options={[
-    {label: "Google Colab", value: "https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter2/section5_tf.ipynb"},
-    {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter2/section5_tf.ipynb"},
-]} />
-
-{/if}
-
-{#if fw === 'pt'}
-<Youtube id="M6adb1j2jPI"/>
-{:else}
-<Youtube id="ROxrFOEbsQE"/>
-{/if}
-
-In the previous section, we explored the simplest of use cases: doing inference on a single sequence of a small length. However, some questions emerge already:
-
-- How do we handle multiple sequences?
-- How do we handle multiple sequences *of different lengths*?
-- Are vocabulary indices the only inputs that allow a model to work well?
-- Is there such a thing as too long a sequence?
-
-Let's see what kinds of problems these questions pose, and how we can solve them using the 🤗 Transformers API.
-
-## Models expect a batch of inputs
-
-In the previous exercise you saw how sequences get translated into lists of numbers. Let's convert this list of numbers to a tensor and send it to the model:
-
-{#if fw === 'pt'}
-```py
-import torch
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
-
-checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
-tokenizer = AutoTokenizer.from_pretrained(checkpoint)
-model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
-
-sequence = "I've been waiting for a HuggingFace course my whole life."
-
-tokens = tokenizer.tokenize(sequence)
-ids = tokenizer.convert_tokens_to_ids(tokens)
-input_ids = torch.tensor(ids)
-# This line will fail.
-model(input_ids)
-```
-
-```python out
-IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)
-```
-{:else}
-```py
-import tensorflow as tf
-from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
-
-checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
-tokenizer = AutoTokenizer.from_pretrained(checkpoint)
-model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint)
-
-sequence = "I've been waiting for a HuggingFace course my whole life."
-
-tokens = tokenizer.tokenize(sequence)
-ids = tokenizer.convert_tokens_to_ids(tokens)
-input_ids = tf.constant(ids)
-# This line will fail.
-model(input_ids)
-```
-
-```py out
-InvalidArgumentError: Input to reshape is a tensor with 14 values, but the requested shape has 196 [Op:Reshape]
-```
-{/if}
-
-Oh no! Why did this fail? "We followed the steps from the pipeline in section 2.
-
-The problem is that we sent a single sequence to the model, whereas 🤗 Transformers models expect multiple sentences by default. Here we tried to do everything the tokenizer did behind the scenes when we applied it to a `sequence`, but if you look closely, you'll see that it didn't just convert the list of input IDs into a tensor, it added a dimension on top of it:
-
-{#if fw === 'pt'}
-```py
-tokenized_inputs = tokenizer(sequence, return_tensors="pt")
-print(tokenized_inputs["input_ids"])
-```
-
-```python out
-tensor([[  101,  1045,  1005,  2310,  2042,  3403,  2005,  1037, 17662, 12172,
-          2607,  2026,  2878,  2166,  1012,   102]])
-```
-{:else}
-```py
-tokenized_inputs = tokenizer(sequence, return_tensors="tf")
-print(tokenized_inputs["input_ids"])
-```
-
-```py out
-<tf.Tensor: shape=(1, 16), dtype=int32, numpy=
-array([[  101,  1045,  1005,  2310,  2042,  3403,  2005,  1037, 17662,
-        12172,  2607,  2026,  2878,  2166,  1012,   102]], dtype=int32)>
-```
-{/if}
-
-Let's try again and add a new dimension:
-
-{#if fw === 'pt'}
-```py
-import torch
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
-
-checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
-tokenizer = AutoTokenizer.from_pretrained(checkpoint)
-model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
-
-sequence = "I've been waiting for a HuggingFace course my whole life."
-
-tokens = tokenizer.tokenize(sequence)
-ids = tokenizer.convert_tokens_to_ids(tokens)
-
-input_ids = torch.tensor([ids])
-print("Input IDs:", input_ids)
-
-output = model(input_ids)
-print("Logits:", output.logits)
-```
-{:else}
-```py
-import tensorflow as tf
-from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
-
-checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
-tokenizer = AutoTokenizer.from_pretrained(checkpoint)
-model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint)
-
-sequence = "I've been waiting for a HuggingFace course my whole life."
-
-tokens = tokenizer.tokenize(sequence)
-ids = tokenizer.convert_tokens_to_ids(tokens)
-
-input_ids = tf.constant([ids])
-print("Input IDs:", input_ids)
-
-output = model(input_ids)
-print("Logits:", output.logits)
-```
-{/if}
-
-We print the input IDs as well as the resulting logits — here's the output:
-
-{#if fw === 'pt'}
-```python out
-Input IDs: [[ 1045,  1005,  2310,  2042,  3403,  2005,  1037, 17662, 12172,  2607, 2026,  2878,  2166,  1012]]
-Logits: [[-2.7276,  2.8789]]
-```
-{:else}
-```py out
-Input IDs: tf.Tensor(
-[[ 1045  1005  2310  2042  3403  2005  1037 17662 12172  2607  2026  2878
-   2166  1012]], shape=(1, 14), dtype=int32)
-Logits: tf.Tensor([[-2.7276208  2.8789377]], shape=(1, 2), dtype=float32)
-```
-{/if}
-
-*Batching* is the act of sending multiple sentences through the model, all at once. If you only have one sentence, you can just build a batch with a single sequence: 
-
-```
-batched_ids = [ids, ids]
-```
-
-This is a batch of two identical sequences!
-
-<Tip>
-
-✏️ **Try it out!** Convert this `batched_ids` list into a tensor and pass it through your model. Check that you obtain the same logits as before (but twice)!
-
-</Tip>
-
-Batching allows the model to work when you feed it multiple sentences. Using multiple sequences is just as simple as building a batch with a single sequence. There's a second issue, though. When you're trying to batch together two (or more) sentences, they might be of different lengths. If you've ever worked with tensors before, you know that they need to be of rectangular shape, so you won't be able to convert the list of input IDs into a tensor directly. To work around this problem, we usually *pad* the inputs.
-
-## Padding the inputs
-
-The following list of lists cannot be converted to a tensor:
-
-```py no-format
-batched_ids = [
-    [200, 200, 200],
-    [200, 200]
-]
-```
-
-In order to work around this, we'll use *padding* to make our tensors have a rectangular shape. Padding makes sure all our sentences have the same length by adding a special word called the *padding token* to the sentences with fewer values. For example, if you have 10 sentences with 10 words and 1 sentence with 20 words, padding will ensure all the sentences have 20 words. In our example, the resulting tensor looks like this:
-
-```py no-format
-padding_id = 100
-
-batched_ids = [
-    [200, 200, 200],
-    [200, 200, padding_id],
-]
-```
-
-The padding token ID can be found in `tokenizer.pad_token_id`. Let's use it and send our two sentences through the model individually and batched together:
-
-{#if fw === 'pt'}
-```py no-format
-model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
-
-sequence1_ids = [[200, 200, 200]]
-sequence2_ids = [[200, 200]]
-batched_ids = [
-    [200, 200, 200],
-    [200, 200, tokenizer.pad_token_id],
-]
-
-print(model(torch.tensor(sequence1_ids)).logits)
-print(model(torch.tensor(sequence2_ids)).logits)
-print(model(torch.tensor(batched_ids)).logits)
-```
-
-```python out
-tensor([[ 1.5694, -1.3895]], grad_fn=<AddmmBackward>)
-tensor([[ 0.5803, -0.4125]], grad_fn=<AddmmBackward>)
-tensor([[ 1.5694, -1.3895],
-        [ 1.3373, -1.2163]], grad_fn=<AddmmBackward>)
-```
-{:else}
-```py no-format
-model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint)
-
-sequence1_ids = [[200, 200, 200]]
-sequence2_ids = [[200, 200]]
-batched_ids = [
-    [200, 200, 200],
-    [200, 200, tokenizer.pad_token_id],
-]
-
-print(model(tf.constant(sequence1_ids)).logits)
-print(model(tf.constant(sequence2_ids)).logits)
-print(model(tf.constant(batched_ids)).logits)
-```
-
-```py out
-tf.Tensor([[ 1.5693678 -1.3894581]], shape=(1, 2), dtype=float32)
-tf.Tensor([[ 0.5803005  -0.41252428]], shape=(1, 2), dtype=float32)
-tf.Tensor(
-[[ 1.5693681 -1.3894582]
- [ 1.3373486 -1.2163193]], shape=(2, 2), dtype=float32)
-```
-{/if}
-
-There's something wrong with the logits in our batched predictions: the second row should be the same as the logits for the second sentence, but we've got completely different values!
-
-This is because the key feature of Transformer models is attention layers that *contextualize* each token. These will take into account the padding tokens since they attend to all of the tokens of a sequence. To get the same result when passing individual sentences of different lengths through the model or when passing a batch with the same sentences and padding applied, we need to tell those attention layers to ignore the padding tokens. This is done by using an attention mask.
-
-## Attention masks
-
-*Attention masks* are tensors with the exact same shape as the input IDs tensor, filled with 0s and 1s: 1s indicate the corresponding tokens should be attended to, and 0s indicate the corresponding tokens should not be attended to (i.e., they should be ignored by the attention layers of the model).
-
-Let's complete the previous example with an attention mask:
-
-{#if fw === 'pt'}
-```py no-format
-batched_ids = [
-    [200, 200, 200],
-    [200, 200, tokenizer.pad_token_id],
-]
-
-attention_mask = [
-    [1, 1, 1],
-    [1, 1, 0],
-]
-
-outputs = model(torch.tensor(batched_ids), attention_mask=torch.tensor(attention_mask))
-print(outputs.logits)
-```
-
-```python out
-tensor([[ 1.5694, -1.3895],
-        [ 0.5803, -0.4125]], grad_fn=<AddmmBackward>)
-```
-{:else}
-```py no-format
-batched_ids = [
-    [200, 200, 200],
-    [200, 200, tokenizer.pad_token_id],
-]
-
-attention_mask = [
-    [1, 1, 1],
-    [1, 1, 0],
-]
-
-outputs = model(tf.constant(batched_ids), attention_mask=tf.constant(attention_mask))
-print(outputs.logits)
-```
-
-```py out
-tf.Tensor(
-[[ 1.5693681  -1.3894582 ]
- [ 0.5803021  -0.41252586]], shape=(2, 2), dtype=float32)
-```
-{/if}
-
-Now we get the same logits for the second sentence in the batch.
-
-Notice how the last value of the second sequence is a padding ID, which is a 0 value in the attention mask.
-
-<Tip>
-
-✏️ **Try it out!** Apply the tokenization manually on the two sentences used in section 2 ("I've been waiting for a HuggingFace course my whole life." and "I hate this so much!"). Pass them through the model and check that you get the same logits as in section 2. Now batch them together using the padding token, then create the proper attention mask. Check that you obtain the same results when going through the model!
-
-</Tip>
-
-## Longer sequences
-
-With Transformer models, there is a limit to the lengths of the sequences we can pass the models. Most models handle sequences of up to 512 or 1024 tokens, and will crash when asked to process longer sequences. There are two solutions to this problem:
-
-- Use a model with a longer supported sequence length.
-- Truncate your sequences.
-
-Models have different supported sequence lengths, and some specialize in handling very long sequences. [Longformer](https://huggingface.co/transformers/model_doc/longformer.html) is one example, and another is [LED](https://huggingface.co/transformers/model_doc/led.html). If you're working on a task that requires very long sequences, we recommend you take a look at those models.
-
-Otherwise, we recommend you truncate your sequences by specifying the `max_sequence_length` parameter:
-
-```py
-sequence = sequence[:max_sequence_length]
-```
diff --git a/chapters/it/chapter2/6.mdx b/chapters/it/chapter2/6.mdx
deleted file mode 100644
index 974123515..000000000
--- a/chapters/it/chapter2/6.mdx
+++ /dev/null
@@ -1,164 +0,0 @@
-<FrameworkSwitchCourse {fw} />
-
-# Putting it all together
-
-{#if fw === 'pt'}
-
-<DocNotebookDropdown
-  classNames="absolute z-10 right-0 top-0"
-  options={[
-    {label: "Google Colab", value: "https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter2/section6_pt.ipynb"},
-    {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter2/section6_pt.ipynb"},
-]} />
-
-{:else}
-
-<DocNotebookDropdown
-  classNames="absolute z-10 right-0 top-0"
-  options={[
-    {label: "Google Colab", value: "https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter2/section6_tf.ipynb"},
-    {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter2/section6_tf.ipynb"},
-]} />
-
-{/if}
-
-In the last few sections, we've been trying our best to do most of the work by hand. We've explored how tokenizers work and looked at tokenization, conversion to input IDs, padding, truncation, and attention masks.
-
-However, as we saw in section 2, the 🤗 Transformers API can handle all of this for us with a high-level function that we'll dive into here. When you call your `tokenizer` directly on the sentence, you get back inputs that are ready to pass through your model:
-
-```py
-from transformers import AutoTokenizer
-
-checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
-tokenizer = AutoTokenizer.from_pretrained(checkpoint)
-
-sequence = "I've been waiting for a HuggingFace course my whole life."
-
-model_inputs = tokenizer(sequence)
-```
-
-Here, the `model_inputs` variable contains everything that's necessary for a model to operate well. For DistilBERT, that includes the input IDs as well as the attention mask. Other models that accept additional inputs will also have those output by the `tokenizer` object.
-
-As we'll see in some examples below, this method is very powerful. First, it can tokenize a single sequence:
-
-```py
-sequence = "I've been waiting for a HuggingFace course my whole life."
-
-model_inputs = tokenizer(sequence)
-```
-
-It also handles multiple sequences at a time, with no change in the API:
-
-```py
-sequences = ["I've been waiting for a HuggingFace course my whole life.", "So have I!"]
-
-model_inputs = tokenizer(sequences)
-```
-
-It can pad according to several objectives:
-
-```py
-# Will pad the sequences up to the maximum sequence length
-model_inputs = tokenizer(sequences, padding="longest")
-
-# Will pad the sequences up to the model max length
-# (512 for BERT or DistilBERT)
-model_inputs = tokenizer(sequences, padding="max_length")
-
-# Will pad the sequences up to the specified max length
-model_inputs = tokenizer(sequences, padding="max_length", max_length=8)
-```
-
-It can also truncate sequences:
-
-```py
-sequences = ["I've been waiting for a HuggingFace course my whole life.", "So have I!"]
-
-# Will truncate the sequences that are longer than the model max length
-# (512 for BERT or DistilBERT)
-model_inputs = tokenizer(sequences, truncation=True)
-
-# Will truncate the sequences that are longer than the specified max length
-model_inputs = tokenizer(sequences, max_length=8, truncation=True)
-```
-
-The `tokenizer` object can handle the conversion to specific framework tensors, which can then be directly sent to the model. For example, in the following code sample we are prompting the tokenizer to return tensors from the different frameworks — `"pt"` returns PyTorch tensors, `"tf"` returns TensorFlow tensors, and `"np"` returns NumPy arrays:
-
-```py
-sequences = ["I've been waiting for a HuggingFace course my whole life.", "So have I!"]
-
-# Returns PyTorch tensors
-model_inputs = tokenizer(sequences, padding=True, return_tensors="pt")
-
-# Returns TensorFlow tensors
-model_inputs = tokenizer(sequences, padding=True, return_tensors="tf")
-
-# Returns NumPy arrays
-model_inputs = tokenizer(sequences, padding=True, return_tensors="np")
-```
-
-## Special tokens
-
-If we take a look at the input IDs returned by the tokenizer, we will see they are a tiny bit different from what we had earlier:
-
-```py
-sequence = "I've been waiting for a HuggingFace course my whole life."
-
-model_inputs = tokenizer(sequence)
-print(model_inputs["input_ids"])
-
-tokens = tokenizer.tokenize(sequence)
-ids = tokenizer.convert_tokens_to_ids(tokens)
-print(ids)
-```
-
-```python out
-[101, 1045, 1005, 2310, 2042, 3403, 2005, 1037, 17662, 12172, 2607, 2026, 2878, 2166, 1012, 102]
-[1045, 1005, 2310, 2042, 3403, 2005, 1037, 17662, 12172, 2607, 2026, 2878, 2166, 1012]
-```
-
-One token ID was added at the beginning, and one at the end. Let's decode the two sequences of IDs above to see what this is about:
-
-```py
-print(tokenizer.decode(model_inputs["input_ids"]))
-print(tokenizer.decode(ids))
-```
-
-```python out
-"[CLS] i've been waiting for a huggingface course my whole life. [SEP]"
-"i've been waiting for a huggingface course my whole life."
-```
-
-The tokenizer added the special word `[CLS]` at the beginning and the special word `[SEP]` at the end. This is because the model was pretrained with those, so to get the same results for inference we need to add them as well. Note that some models don't add special words, or add different ones; models may also add these special words only at the beginning, or only at the end. In any case, the tokenizer knows which ones are expected and will deal with this for you.
-
-## Wrapping up: From tokenizer to model
-
-Now that we've seen all the individual steps the `tokenizer` object uses when applied on texts, let's see one final time how it can handle multiple sequences (padding!), very long sequences (truncation!), and multiple types of tensors with its main API:
-
-{#if fw === 'pt'}
-```py
-import torch
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
-
-checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
-tokenizer = AutoTokenizer.from_pretrained(checkpoint)
-model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
-sequences = ["I've been waiting for a HuggingFace course my whole life.", "So have I!"]
-
-tokens = tokenizer(sequences, padding=True, truncation=True, return_tensors="pt")
-output = model(**tokens)
-```
-{:else}
-```py
-import tensorflow as tf
-from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
-
-checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
-tokenizer = AutoTokenizer.from_pretrained(checkpoint)
-model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint)
-sequences = ["I've been waiting for a HuggingFace course my whole life.", "So have I!"]
-
-tokens = tokenizer(sequences, padding=True, truncation=True, return_tensors="tf")
-output = model(**tokens)
-```
-{/if}
diff --git a/chapters/it/chapter2/7.mdx b/chapters/it/chapter2/7.mdx
deleted file mode 100644
index 122728d08..000000000
--- a/chapters/it/chapter2/7.mdx
+++ /dev/null
@@ -1,13 +0,0 @@
-# Basic usage completed!
-
-Great job following the course up to here! To recap, in this chapter you:
-
-- Learned the basic building blocks of a Transformer model.
-- Learned what makes up a tokenization pipeline.
-- Saw how to use a Transformer model in practice.
-- Learned how to leverage a tokenizer to convert text to tensors that are understandable by the model.
-- Set up a tokenizer and a model together to get from text to predictions.
-- Learned the limitations of input IDs, and learned about attention masks.
-- Played around with versatile and configurable tokenizer methods.
-
-From now on, you should be able to freely navigate the 🤗 Transformers docs: the vocabulary will sound familiar, and you've already seen the methods that you'll use the majority of the time.
diff --git a/chapters/it/chapter2/8.mdx b/chapters/it/chapter2/8.mdx
deleted file mode 100644
index 43f0a8c9c..000000000
--- a/chapters/it/chapter2/8.mdx
+++ /dev/null
@@ -1,305 +0,0 @@
-<FrameworkSwitchCourse {fw} />
-
-<!-- DISABLE-FRONTMATTER-SECTIONS -->
-
-# End-of-chapter quiz
-
-### 1. What is the order of the language modeling pipeline?
-
-<Question
-	choices={[
-		{
-			text: "First, the model, which handles text and returns raw predictions. The tokenizer then makes sense of these predictions and converts them back to text when needed.",
-			explain: "The model cannot understand text! The tokenizer must first tokenize the text and convert it to IDs so that it is understandable by the model."
-		},
-		{
-			text: "First, the tokenizer, which handles text and returns IDs. The model handles these IDs and outputs a prediction, which can be some text.",
-			explain: "The model's prediction cannot be text straight away. The tokenizer has to be used in order to convert the prediction back to text!"
-		},
-		{
-			text: "The tokenizer handles text and returns IDs. The model handles these IDs and outputs a prediction. The tokenizer can then be used once again to convert these predictions back to some text.",
-			explain: "Correct! The tokenizer can be used for both tokenizing and de-tokenizing.",
-            correct: true
-		}
-	]}
-/>
-
-### 2. How many dimensions does the tensor output by the base Transformer model have, and what are they?
-
-<Question
-	choices={[
-		{
-			text: "2: The sequence length and the batch size",
-			explain: "False! The tensor output by the model has a third dimension: hidden size."
-		},
-		{
-			text: "2: The sequence length and the hidden size",
-			explain: "False! All Transformer models handle batches, even with a single sequence; that would be a batch size of 1!"
-		},
-		{
-			text: "3: The sequence length, the batch size, and the hidden size",
-			explain: "Correct!",
-            correct: true
-		}
-	]}
-/>
-
-### 3. Which of the following is an example of subword tokenization?
-
-<Question
-	choices={[
-		{
-			text: "WordPiece",
-			explain: "Yes, that's one example of subword tokenization!",
-            correct: true
-		},
-		{
-			text: "Character-based tokenization",
-			explain: "Character-based tokenization is not a type of subword tokenization."
-		},
-		{
-			text: "Splitting on whitespace and punctuation",
-			explain: "That's a word-based tokenization scheme!"
-		},
-		{
-			text: "BPE",
-			explain: "Yes, that's one example of subword tokenization!",
-            correct: true
-        },
-		{
-			text: "Unigram",
-			explain: "Yes, that's one example of subword tokenization!",
-            correct: true
-        },
-		{
-			text: "None of the above",
-			explain: "Incorrect!"
-        }
-	]}
-/>
-
-### 4. What is a model head?
-
-<Question
-	choices={[
-		{
-			text: "A component of the base Transformer network that redirects tensors to their correct layers",
-			explain: "Incorrect! There's no such component."
-		},
-		{
-			text: "Also known as the self-attention mechanism, it adapts the representation of a token according to the other tokens of the sequence",
-			explain: "Incorrect! The self-attention layer does contain attention \"heads,\" but these are not adaptation heads."
-		},
-		{
-			text: "An additional component, usually made up of one or a few layers, to convert the transformer predictions to a task-specific output",
-			explain: "That's right. Adaptation heads, also known simply as heads, come up in different forms: language modeling heads, question answering heads, sequence classification heads... ",
-			correct: true
-		} 
-	]}
-/>
-
-{#if fw === 'pt'}
-### 5. What is an AutoModel?
-
-<Question
-	choices={[
-		{
-			text: "A model that automatically trains on your data",
-			explain: "Incorrect. Are you mistaking this with our <a href='https://huggingface.co/autonlp'>AutoNLP</a> product?"
-		},
-		{
-			text: "An object that returns the correct architecture based on the checkpoint",
-			explain: "Exactly: the <code>AutoModel</code> only needs to know the checkpoint from which to initialize to return the correct architecture.",
-			correct: true
-		},
-		{
-			text: "A model that automatically detects the language used for its inputs to load the correct weights",
-			explain: "Incorrect; while some checkpoints and models are capable of handling multiple languages, there are no built-in tools for automatic checkpoint selection according to language. You should head over to the <a href='https://huggingface.co/models'>Model Hub</a> to find the best checkpoint for your task!"
-		} 
-	]}
-/>
-
-{:else}
-### 5. What is an TFAutoModel?
-
-<Question
-	choices={[
-		{
-			text: "A model that automatically trains on your data",
-			explain: "Incorrect. Are you mistaking this with our <a href='https://huggingface.co/autonlp'>AutoNLP</a> product?"
-		},
-		{
-			text: "An object that returns the correct architecture based on the checkpoint",
-			explain: "Exactly: the <code>TFAutoModel</code> only needs to know the checkpoint from which to initialize to return the correct architecture.",
-			correct: true
-		},
-		{
-			text: "A model that automatically detects the language used for its inputs to load the correct weights",
-			explain: "Incorrect; while some checkpoints and models are capable of handling multiple languages, there are no built-in tools for automatic checkpoint selection according to language. You should head over to the <a href='https://huggingface.co/models'>Model Hub</a> to find the best checkpoint for your task!"
-		} 
-	]}
-/>
-
-{/if}
-
-### 6. What are the techniques to be aware of when batching sequences of different lengths together?
-
-<Question
-	choices={[
-		{
-			text: "Truncating",
-			explain: "Yes, truncation is a correct way of evening out sequences so that they fit in a rectangular shape. Is it the only one, though?",
-			correct: true
-		},
-		{
-			text: "Returning tensors",
-			explain: "While the other techniques allow you to return rectangular tensors, returning tensors isn't helpful when batching sequences together."
-		},
-		{
-			text: "Padding",
-			explain: "Yes, padding is a correct way of evening out sequences so that they fit in a rectangular shape. Is it the only one, though?",
-			correct: true
-		}, 
-		{
-			text: "Attention masking",
-			explain: "Absolutely! Attention masks are of prime importance when handling sequences of different lengths. That's not the only technique to be aware of, however.",
-			correct: true
-		} 
-	]}
-/>
-
-### 7. What is the point of applying a SoftMax function to the logits output by a sequence classification model?
-
-<Question
-	choices={[
-		{
-			text: "It softens the logits so that they're more reliable.",
-			explain: "No, the SoftMax function does not affect the reliability of results."
-		},
-		{
-			text: "It applies a lower and upper bound so that they're understandable.",
-			explain: "Correct! The resulting values are bound between 0 and 1. That's not the only reason we use a SoftMax function, though.",
-            correct: true
-		},
-		{
-			text: "The total sum of the output is then 1, resulting in a possible probabilistic interpretation.",
-			explain: "Correct! That's not the only reason we use a SoftMax function, though.",
-            correct: true
-		}
-	]}
-/>
-
-### 8. What method is most of the tokenizer API centered around?
-
-<Question
-	choices={[
-		{
-			text: "<code>encode</code>, as it can encode text into IDs and IDs into predictions",
-			explain: "Wrong! While the <code>encode</code> method does exist on tokenizers, it does not exist on models."
-		},
-		{
-			text: "Calling the tokenizer object directly.",
-			explain: "Exactly! The <code>__call__</code> method of the tokenizer is a very powerful method which can handle pretty much anything. It is also the method used to retrieve predictions from a model.",
-			correct: true
-		},
-		{
-			text: "<code>pad</code>",
-			explain: "Wrong! Padding is very useful, but it's just one part of the tokenizer API."
-		},
-		{
-			text: "<code>tokenize</code>",
-			explain: "The <code>tokenize</code> method is arguably one of the most useful methods, but it isn't the core of the tokenizer API."
-		}
-	]}
-/>
-
-### 9. What does the `result` variable contain in this code sample?
-
-```py
-from transformers import AutoTokenizer
-
-tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
-result = tokenizer.tokenize("Hello!")
-```
-
-<Question
-	choices={[
-		{
-			text: "A list of strings, each string being a token",
-			explain: "Absolutely! Convert this to IDs, and send them to a model!",
-            correct: true
-		},
-		{
-			text: "A list of IDs",
-			explain: "Incorrect; that's what the <code>__call__</code> or <code>convert_tokens_to_ids</code> method is for!"
-		},
-		{
-			text: "A string containing all of the tokens",
-			explain: "This would be suboptimal, as the goal is to split the string into multiple tokens."
-		}
-	]}
-/>
-
-{#if fw === 'pt'}
-### 10. Is there something wrong with the following code?
-
-```py
-from transformers import AutoTokenizer, AutoModel
-
-tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
-model = AutoModel.from_pretrained("gpt2")
-
-encoded = tokenizer("Hey!", return_tensors="pt")
-result = model(**encoded)
-```
-
-<Question
-	choices={[
-		{
-			text: "No, it seems correct.",
-			explain: "Unfortunately, coupling a model with a tokenizer that was trained with a different checkpoint is rarely a good idea. The model was not trained to make sense out of this tokenizer's output, so the model output (if it can even run!) will not make any sense."
-		},
-		{
-			text: "The tokenizer and model should always be from the same checkpoint.",
-			explain: "Right!",
-            correct: true
-		},
-		{
-			text: "It's good practice to pad and truncate with the tokenizer as every input is a batch.",
-			explain: "It's true that every model input needs to be a batch. However, truncating or padding this sequence wouldn't necessarily make sense as there is only one of it, and those are techniques to batch together a list of sentences."
-		}
-	]}
-/>
-
-{:else}
-### 10. Is there something wrong with the following code?
-
-```py
-from transformers import AutoTokenizer, TFAutoModel
-
-tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
-model = TFAutoModel.from_pretrained("gpt2")
-
-encoded = tokenizer("Hey!", return_tensors="pt")
-result = model(**encoded)
-```
-
-<Question
-	choices={[
-		{
-			text: "No, it seems correct.",
-			explain: "Unfortunately, coupling a model with a tokenizer that was trained with a different checkpoint is rarely a good idea. The model was not trained to make sense out of this tokenizer's output, so the model output (if it can even run!) will not make any sense."
-		},
-		{
-			text: "The tokenizer and model should always be from the same checkpoint.",
-			explain: "Right!",
-            correct: true
-		},
-		{
-			text: "It's good practice to pad and truncate with the tokenizer as every input is a batch.",
-			explain: "It's true that every model input needs to be a batch. However, truncating or padding this sequence wouldn't necessarily make sense as there is only one of it, and those are techniques to batch together a list of sentences."
-		}
-	]}
-/>
-
-{/if}
diff --git a/chapters/it/chapter2/test b/chapters/it/chapter2/test
deleted file mode 100644
index 8b1378917..000000000
--- a/chapters/it/chapter2/test
+++ /dev/null
@@ -1 +0,0 @@
-

From 18daed7ab6c12a3223b43d4fdb0f2629323adf64 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 26 Apr 2022 21:28:18 +0100
Subject: [PATCH 055/127] Update build_documentation.yml

---
 .github/workflows/build_documentation.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/build_documentation.yml b/.github/workflows/build_documentation.yml
index 4e924e0d7..4c0ab0b50 100644
--- a/.github/workflows/build_documentation.yml
+++ b/.github/workflows/build_documentation.yml
@@ -14,5 +14,6 @@ jobs:
       package: course
       path_to_docs: course/chapters/en
       additional_args: --not_python_module
+      languages: ar bn de en es fa fr gj he hi it ja ko pt ru th tr zh-CN
     secrets:
-      token: ${{ secrets.HUGGINGFACE_PUSH }}
\ No newline at end of file
+      token: ${{ secrets.HUGGINGFACE_PUSH }}

From 14836a65e361c5cdad9f4a0863321b121c0caebb Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 26 Apr 2022 21:28:38 +0100
Subject: [PATCH 056/127] Update build_pr_documentation.yml

---
 .github/workflows/build_pr_documentation.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/build_pr_documentation.yml b/.github/workflows/build_pr_documentation.yml
index b85046866..a6d661962 100644
--- a/.github/workflows/build_pr_documentation.yml
+++ b/.github/workflows/build_pr_documentation.yml
@@ -15,4 +15,5 @@ jobs:
       pr_number: ${{ github.event.number }}
       package: course
       path_to_docs: course/chapters/en
-      additional_args: --not_python_module
\ No newline at end of file
+      additional_args: --not_python_module
+      languages: ar bn de en es fa fr gj he hi it ja ko pt ru th tr zh-CN

From e5b09af68a9aaf333504102c552ce1221f055338 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 26 Apr 2022 21:30:24 +0100
Subject: [PATCH 057/127] Adds chapter1/1 to ToC

---
 chapters/it/_toctree.yml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/chapters/it/_toctree.yml b/chapters/it/_toctree.yml
index a076b2f04..a46bf9993 100644
--- a/chapters/it/_toctree.yml
+++ b/chapters/it/_toctree.yml
@@ -2,3 +2,8 @@
   sections:
   - local: chapter0/1
     title: Introduzione
+    
+- title: 1. Modelli Transformer
+  sections:
+  - local: chapter1/1
+    title: Introduzione

From 942b3a7035f86b3b5d8e9df3e6190a6907618ef8 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Wed, 27 Apr 2022 22:00:44 +0100
Subject: [PATCH 058/127] Adds chapter1/2

---
 chapters/it/_toctree.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/chapters/it/_toctree.yml b/chapters/it/_toctree.yml
index a46bf9993..f6d1fd6c4 100644
--- a/chapters/it/_toctree.yml
+++ b/chapters/it/_toctree.yml
@@ -7,3 +7,5 @@
   sections:
   - local: chapter1/1
     title: Introduzione
+  - local: chapter1/2
+    title: Natural Language Processing

From 7ca7283aed99b80b0f357bad021a527528a5662f Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Wed, 27 Apr 2022 22:02:39 +0100
Subject: [PATCH 059/127] Creates file 1/2

---
 chapters/it/chapter1/2.mdx | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 chapters/it/chapter1/2.mdx

diff --git a/chapters/it/chapter1/2.mdx b/chapters/it/chapter1/2.mdx
new file mode 100644
index 000000000..4e4aecc1a
--- /dev/null
+++ b/chapters/it/chapter1/2.mdx
@@ -0,0 +1,21 @@
+# Natural Language Processing
+
+Before jumping into Transformer models, let's do a quick overview of what natural language processing is and why we care about it.
+
+## What is NLP?
+
+NLP is a field of linguistics and machine learning focused on understanding everything related to human language. The aim of NLP tasks is not only to understand single words individually, but to be able to understand the context of those words.
+
+The following is a list of common NLP tasks, with some examples of each:
+
+- **Classifying whole sentences**: Getting the sentiment of a review, detecting if an email is spam, determining if a sentence is grammatically correct or whether two sentences are logically related or not
+- **Classifying each word in a sentence**: Identifying the grammatical components of a sentence (noun, verb, adjective), or the named entities (person, location, organization)
+- **Generating text content**: Completing a prompt with auto-generated text, filling in the blanks in a text with masked words
+- **Extracting an answer from a text**: Given a question and a context, extracting the answer to the question based on the information provided in the context
+- **Generating a new sentence from an input text**: Translating a text into another language, summarizing a text
+
+NLP isn't limited to written text though. It also tackles complex challenges in speech recognition and computer vision, such as generating a transcript of an audio sample or a description of an image.
+
+## Why is it challenging?
+
+Computers don't process information in the same way as humans. For example, when we read the sentence "I am hungry," we can easily understand its meaning. Similarly, given two sentences such as "I am hungry" and "I am sad," we're able to easily determine how similar they are. For machine learning (ML) models, such tasks are more difficult. The text needs to be processed in a way that enables the model to learn from it. And because language is complex, we need to think carefully about how this processing must be done. There has been a lot of research done on how to represent text, and we will look at some methods in the next chapter.

From 048d8fb857fbf32c7abc5ffe20cfc15d7b94243d Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Wed, 27 Apr 2022 22:11:12 +0100
Subject: [PATCH 060/127] Translates two paragraphs

---
 chapters/it/chapter1/2.mdx | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/chapters/it/chapter1/2.mdx b/chapters/it/chapter1/2.mdx
index 4e4aecc1a..3e4e0b561 100644
--- a/chapters/it/chapter1/2.mdx
+++ b/chapters/it/chapter1/2.mdx
@@ -1,10 +1,10 @@
 # Natural Language Processing
 
-Before jumping into Transformer models, let's do a quick overview of what natural language processing is and why we care about it.
+Prima di tuffarci nei modelli Transformer, diamo un'occhiata rapida alla natura del natural language processing (*elaborazione del linguaggio naturale*) e alle ragioni per cui quest'ultimo ci interessa.
 
-## What is NLP?
+## Cosa intendiamo per NLP?
 
-NLP is a field of linguistics and machine learning focused on understanding everything related to human language. The aim of NLP tasks is not only to understand single words individually, but to be able to understand the context of those words.
+NLP è un campo di linguistica e machine learning (*apprendimento automatico*) che si focalizza sulla comprensione di tutto ciò che è legato al linguaggio umano. L'obiettivo dei compiti di NLP non è semplicemente di capire singole parole individualmente, ma anche di capirne il contesto.
 
 The following is a list of common NLP tasks, with some examples of each:
 

From 4dcfa800b10f5a76f81a1c4006e7d2915b39646d Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Wed, 27 Apr 2022 22:29:39 +0100
Subject: [PATCH 061/127] Completes translation of second section

---
 chapters/it/chapter1/2.mdx | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/chapters/it/chapter1/2.mdx b/chapters/it/chapter1/2.mdx
index 3e4e0b561..036b85f6e 100644
--- a/chapters/it/chapter1/2.mdx
+++ b/chapters/it/chapter1/2.mdx
@@ -6,16 +6,16 @@ Prima di tuffarci nei modelli Transformer, diamo un'occhiata rapida alla natura
 
 NLP è un campo di linguistica e machine learning (*apprendimento automatico*) che si focalizza sulla comprensione di tutto ciò che è legato al linguaggio umano. L'obiettivo dei compiti di NLP non è semplicemente di capire singole parole individualmente, ma anche di capirne il contesto.
 
-The following is a list of common NLP tasks, with some examples of each:
+La seguente è una lista dei più comuni compiti di NLP, ognuno accompagnato da esempi:
 
-- **Classifying whole sentences**: Getting the sentiment of a review, detecting if an email is spam, determining if a sentence is grammatically correct or whether two sentences are logically related or not
-- **Classifying each word in a sentence**: Identifying the grammatical components of a sentence (noun, verb, adjective), or the named entities (person, location, organization)
-- **Generating text content**: Completing a prompt with auto-generated text, filling in the blanks in a text with masked words
-- **Extracting an answer from a text**: Given a question and a context, extracting the answer to the question based on the information provided in the context
-- **Generating a new sentence from an input text**: Translating a text into another language, summarizing a text
+- **Classificazione di frasi intere**: Comprendere il tono di una recensione, comprendere se una mail è spam (*spazzatura*), determinare se una frase è grammaticalmente corretta oppure se due frasi hanno un legame logico
+- **Classificazione di parole singole all'interno di una frase**: Identificazione dei componenti grammaticali di una frase (nome, verbo, aggettivo), o di entità denominate (persona, località, organizzazione)
+- **Generazione di contenuto testuale**: Completare un prompt a mezzo di testo auto-generato, colmare spazi vuoti in un testo con parole mascherate
+- **Estrazione di risposte da un testo**: Dati una domanda e un contesto, estrarre la risposta alla domanda sulla base del contesto fornito
+- **Generazione di frasi nuove a partire da un testo input**: Traduzione di un testo in un'altra lingua, riassunto di un testo
 
-NLP isn't limited to written text though. It also tackles complex challenges in speech recognition and computer vision, such as generating a transcript of an audio sample or a description of an image.
+NLP non si limita però ai soli testi scritti, e tratta anche sfide complesse in riconoscimento vocale e computer vision (*elaborazione di dati visuali*), quali la generazione di trascrizioni di campioni audio o la descrizione di immagini.
 
-## Why is it challenging?
+## Perché constituisce una sfida?
 
 Computers don't process information in the same way as humans. For example, when we read the sentence "I am hungry," we can easily understand its meaning. Similarly, given two sentences such as "I am hungry" and "I am sad," we're able to easily determine how similar they are. For machine learning (ML) models, such tasks are more difficult. The text needs to be processed in a way that enables the model to learn from it. And because language is complex, we need to think carefully about how this processing must be done. There has been a lot of research done on how to represent text, and we will look at some methods in the next chapter.

From 0529fddc798211f1d1a93ffa3a7771fe76700c5d Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Wed, 27 Apr 2022 22:37:05 +0100
Subject: [PATCH 062/127] Adds last section

---
 chapters/it/chapter1/2.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/chapters/it/chapter1/2.mdx b/chapters/it/chapter1/2.mdx
index 036b85f6e..de7fbf4ef 100644
--- a/chapters/it/chapter1/2.mdx
+++ b/chapters/it/chapter1/2.mdx
@@ -18,4 +18,4 @@ NLP non si limita però ai soli testi scritti, e tratta anche sfide complesse in
 
 ## Perché constituisce una sfida?
 
-Computers don't process information in the same way as humans. For example, when we read the sentence "I am hungry," we can easily understand its meaning. Similarly, given two sentences such as "I am hungry" and "I am sad," we're able to easily determine how similar they are. For machine learning (ML) models, such tasks are more difficult. The text needs to be processed in a way that enables the model to learn from it. And because language is complex, we need to think carefully about how this processing must be done. There has been a lot of research done on how to represent text, and we will look at some methods in the next chapter.
+I computer non elaborano le informazioni allo stesso modo degli umani. Ad esempio, quando leggiamo la frase "Ho fame," ne capiamo senza difficoltà il senso. Allo stesso modo, date due frasi quali "Ho fame" e "Sono triste," riusciamo facilmente a determinarne il livello di similarità. Per i modelli di machine learning (ML), tali compiti sono più difficili. Il testo deve essere elaborato in un modo che permetta al modello di imparare da esso. E siccome il linguaggio è complesso, il modo in cui l'elaborazione dev'essere svolta va studiato con cura. Molta ricerca è stata fatta su come rappresentare i testi, e nel prossimo capitolo vedremo alcuni di questi metodi.

From b1b60c0ad1b00c53d56c4336ea8a369c032d4f2f Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Wed, 27 Apr 2022 22:40:21 +0100
Subject: [PATCH 063/127] Final version

---
 chapters/it/chapter1/2.mdx | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/chapters/it/chapter1/2.mdx b/chapters/it/chapter1/2.mdx
index de7fbf4ef..8e9191d8a 100644
--- a/chapters/it/chapter1/2.mdx
+++ b/chapters/it/chapter1/2.mdx
@@ -8,14 +8,14 @@ NLP è un campo di linguistica e machine learning (*apprendimento automatico*) c
 
 La seguente è una lista dei più comuni compiti di NLP, ognuno accompagnato da esempi:
 
-- **Classificazione di frasi intere**: Comprendere il tono di una recensione, comprendere se una mail è spam (*spazzatura*), determinare se una frase è grammaticalmente corretta oppure se due frasi hanno un legame logico
+- **Classificazione di frasi intere**: Capire il tono di una recensione, comprendere se una mail si tratta di spam (*spazzatura*), determinare se una frase è grammaticalmente corretta oppure se due frasi hanno un legame logico
 - **Classificazione di parole singole all'interno di una frase**: Identificazione dei componenti grammaticali di una frase (nome, verbo, aggettivo), o di entità denominate (persona, località, organizzazione)
 - **Generazione di contenuto testuale**: Completare un prompt a mezzo di testo auto-generato, colmare spazi vuoti in un testo con parole mascherate
-- **Estrazione di risposte da un testo**: Dati una domanda e un contesto, estrarre la risposta alla domanda sulla base del contesto fornito
+- **Estrazione di risposte a partire da un testo**: Dati una domanda e un contesto, estrarre la risposta alla domanda sulla base del contesto fornito
 - **Generazione di frasi nuove a partire da un testo input**: Traduzione di un testo in un'altra lingua, riassunto di un testo
 
 NLP non si limita però ai soli testi scritti, e tratta anche sfide complesse in riconoscimento vocale e computer vision (*elaborazione di dati visuali*), quali la generazione di trascrizioni di campioni audio o la descrizione di immagini.
 
 ## Perché constituisce una sfida?
 
-I computer non elaborano le informazioni allo stesso modo degli umani. Ad esempio, quando leggiamo la frase "Ho fame," ne capiamo senza difficoltà il senso. Allo stesso modo, date due frasi quali "Ho fame" e "Sono triste," riusciamo facilmente a determinarne il livello di similarità. Per i modelli di machine learning (ML), tali compiti sono più difficili. Il testo deve essere elaborato in un modo che permetta al modello di imparare da esso. E siccome il linguaggio è complesso, il modo in cui l'elaborazione dev'essere svolta va studiato con cura. Molta ricerca è stata fatta su come rappresentare i testi, e nel prossimo capitolo vedremo alcuni di questi metodi.
+I computer non elaborano le informazioni allo stesso modo degli umani. Ad esempio, quando leggiamo la frase "Ho fame," ne capiamo senza difficoltà il senso. Allo stesso modo, date due frasi quali "Ho fame" e "Sono triste," riusciamo facilmente a determinarne il livello di similarità. Per i modelli di machine learning (ML), tali compiti sono più difficili. Il testo deve essere elaborato in un modo che permetta al modello di imparare da esso. E siccome il linguaggio è complesso, il modo in cui l'elaborazione va svolta dev'essere studiato con cura. Molta ricerca è stata fatta su come rappresentare i testi, e nel prossimo capitolo vedremo alcuni di questi metodi.

From 1191d17dadf94e0bc65d701c2caaa10ec868705e Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Wed, 27 Apr 2022 22:48:36 +0100
Subject: [PATCH 064/127] Adds chapter 1/3

---
 chapters/it/_toctree.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/chapters/it/_toctree.yml b/chapters/it/_toctree.yml
index f6d1fd6c4..57704102a 100644
--- a/chapters/it/_toctree.yml
+++ b/chapters/it/_toctree.yml
@@ -9,3 +9,5 @@
     title: Introduzione
   - local: chapter1/2
     title: Natural Language Processing
+  - local: chapter1/3
+    title: Transformers, what can they do?

From 3b224d917d0a6e582be2fdee7c6f94a3765dae2d Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Wed, 27 Apr 2022 22:49:10 +0100
Subject: [PATCH 065/127] Creates file

---
 chapters/it/chapter1/3.mdx | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 chapters/it/chapter1/3.mdx

diff --git a/chapters/it/chapter1/3.mdx b/chapters/it/chapter1/3.mdx
new file mode 100644
index 000000000..8b1378917
--- /dev/null
+++ b/chapters/it/chapter1/3.mdx
@@ -0,0 +1 @@
+

From 80c379462b659a20580d2b1b79bef16e96c1ae94 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Wed, 27 Apr 2022 22:51:47 +0100
Subject: [PATCH 066/127] Updates title

---
 chapters/it/chapter1/3.mdx | 328 +++++++++++++++++++++++++++++++++++++
 1 file changed, 328 insertions(+)

diff --git a/chapters/it/chapter1/3.mdx b/chapters/it/chapter1/3.mdx
index 8b1378917..d09b79d05 100644
--- a/chapters/it/chapter1/3.mdx
+++ b/chapters/it/chapter1/3.mdx
@@ -1 +1,329 @@
+# Transformer, per fare cosa?
 
+<DocNotebookDropdown
+  classNames="absolute z-10 right-0 top-0"
+  options={[
+    {label: "Google Colab", value: "https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter1/section3.ipynb"},
+    {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter1/section3.ipynb"},
+]} />
+
+In this section, we will look at what Transformer models can do and use our first tool from the 🤗 Transformers library: the `pipeline()` function.
+
+<Tip>
+👀 See that <em>Open in Colab</em> button on the top right? Click on it to open a Google Colab notebook with all the code samples of this section. This button will be present in any section containing code examples. 
+
+If you want to run the examples locally, we recommend taking a look at the <a href="/course/chapter0">setup</a>.
+</Tip>
+
+## Transformers are everywhere!
+
+Transformer models are used to solve all kinds of NLP tasks, like the ones mentioned in the previous section. Here are some of the companies and organizations using Hugging Face and Transformer models, who also contribute back to the community by sharing their models:
+
+<img src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/companies.PNG" alt="Companies using Hugging Face" width="100%">
+
+The [🤗 Transformers library](https://github.com/huggingface/transformers) provides the functionality to create and use those shared models. The [Model Hub](https://huggingface.co/models) contains thousands of pretrained models that anyone can download and use. You can also upload your own models to the Hub!
+
+<Tip>
+⚠️ The Hugging Face Hub is not limited to Transformer models. Anyone can share any kind of models or datasets they want! <a href="https://huggingface.co/join">Create a huggingface.co</a> account to benefit from all available features!
+</Tip>
+
+Before diving into how Transformer models work under the hood, let's look at a few examples of how they can be used to solve some interesting NLP problems.
+
+## Working with pipelines
+
+<Youtube id="tiZFewofSLM" />
+
+The most basic object in the 🤗 Transformers library is the `pipeline()` function. It connects a model with its necessary preprocessing and postprocessing steps, allowing us to directly input any text and get an intelligible answer:
+
+```python
+from transformers import pipeline
+
+classifier = pipeline("sentiment-analysis")
+classifier("I've been waiting for a HuggingFace course my whole life.")
+```
+
+```python out
+[{'label': 'POSITIVE', 'score': 0.9598047137260437}]
+```
+
+We can even pass several sentences!
+
+```python
+classifier(
+    ["I've been waiting for a HuggingFace course my whole life.", "I hate this so much!"]
+)
+```
+
+```python out
+[{'label': 'POSITIVE', 'score': 0.9598047137260437},
+ {'label': 'NEGATIVE', 'score': 0.9994558095932007}]
+```
+
+By default, this pipeline selects a particular pretrained model that has been fine-tuned for sentiment analysis in English. The model is downloaded and cached when you create the `classifier` object. If you rerun the command, the cached model will be used instead and there is no need to download the model again.
+
+There are three main steps involved when you pass some text to a pipeline:
+
+1. The text is preprocessed into a format the model can understand.
+2. The preprocessed inputs are passed to the model.
+3. The predictions of the model are post-processed, so you can make sense of them.
+
+
+Some of the currently [available pipelines](https://huggingface.co/transformers/main_classes/pipelines.html) are:
+
+- `feature-extraction` (get the vector representation of a text)
+- `fill-mask`
+- `ner` (named entity recognition)
+- `question-answering`
+- `sentiment-analysis`
+- `summarization`
+- `text-generation`
+- `translation`
+- `zero-shot-classification`
+
+Let's have a look at a few of these!
+
+## Zero-shot classification
+
+We'll start by tackling a more challenging task where we need to classify texts that haven't been labelled. This is a common scenario in real-world projects because annotating text is usually time-consuming and requires domain expertise. For this use case, the `zero-shot-classification` pipeline is very powerful: it allows you to specify which labels to use for the classification, so you don't have to rely on the labels of the pretrained model. You've already seen how the model can classify a sentence as positive or negative using those two labels — but it can also classify the text using any other set of labels you like.
+
+```python
+from transformers import pipeline
+
+classifier = pipeline("zero-shot-classification")
+classifier(
+    "This is a course about the Transformers library",
+    candidate_labels=["education", "politics", "business"],
+)
+```
+
+```python out
+{'sequence': 'This is a course about the Transformers library',
+ 'labels': ['education', 'business', 'politics'],
+ 'scores': [0.8445963859558105, 0.111976258456707, 0.043427448719739914]}
+```
+
+This pipeline is called _zero-shot_ because you don't need to fine-tune the model on your data to use it. It can directly return probability scores for any list of labels you want!
+
+<Tip>
+
+✏️ **Try it out!** Play around with your own sequences and labels and see how the model behaves.
+
+</Tip>
+
+
+## Text generation
+
+Now let's see how to use a pipeline to generate some text. The main idea here is that you provide a prompt and the model will auto-complete it by generating the remaining text. This is similar to the predictive text feature that is found on many phones. Text generation involves randomness, so it's normal if you don't get the same results as shown below.
+
+```python
+from transformers import pipeline
+
+generator = pipeline("text-generation")
+generator("In this course, we will teach you how to")
+```
+
+```python out
+[{'generated_text': 'In this course, we will teach you how to understand and use '
+                    'data flow and data interchange when handling user data. We '
+                    'will be working with one or more of the most commonly used '
+                    'data flows — data flows of various types, as seen by the '
+                    'HTTP'}]
+```
+
+You can control how many different sequences are generated with the argument `num_return_sequences` and the total length of the output text with the argument `max_length`.
+
+<Tip>
+
+✏️ **Try it out!** Use the `num_return_sequences` and `max_length` arguments to generate two sentences of 15 words each.
+
+</Tip>
+
+
+## Using any model from the Hub in a pipeline
+
+The previous examples used the default model for the task at hand, but you can also choose a particular model from the Hub to use in a pipeline for a specific task — say, text generation. Go to the [Model Hub](https://huggingface.co/models) and click on the corresponding tag on the left to display only the supported models for that task. You should get to a page like [this one](https://huggingface.co/models?pipeline_tag=text-generation).
+
+Let's try the [`distilgpt2`](https://huggingface.co/distilgpt2) model! Here's how to load it in the same pipeline as before:
+
+```python
+from transformers import pipeline
+
+generator = pipeline("text-generation", model="distilgpt2")
+generator(
+    "In this course, we will teach you how to",
+    max_length=30,
+    num_return_sequences=2,
+)
+```
+
+```python out
+[{'generated_text': 'In this course, we will teach you how to manipulate the world and '
+                    'move your mental and physical capabilities to your advantage.'},
+ {'generated_text': 'In this course, we will teach you how to become an expert and '
+                    'practice realtime, and with a hands on experience on both real '
+                    'time and real'}]
+```
+
+You can refine your search for a model by clicking on the language tags, and pick a model that will generate text in another language. The Model Hub even contains checkpoints for multilingual models that support several languages.
+
+Once you select a model by clicking on it, you'll see that there is a widget enabling you to try it directly online. This way you can quickly test the model's capabilities before downloading it.
+
+<Tip>
+
+✏️ **Try it out!** Use the filters to find a text generation model for another language. Feel free to play with the widget and use it in a pipeline!
+
+</Tip>
+
+### The Inference API
+
+All the models can be tested directly through your browser using the Inference API, which is available on the Hugging Face [website](https://huggingface.co/). You can play with the model directly on this page by inputting custom text and watching the model process the input data.
+
+The Inference API that powers the widget is also available as a paid product, which comes in handy if you need it for your workflows. See the [pricing page](https://huggingface.co/pricing) for more details.
+
+## Mask filling
+
+The next pipeline you'll try is `fill-mask`. The idea of this task is to fill in the blanks in a given text:
+
+```python
+from transformers import pipeline
+
+unmasker = pipeline("fill-mask")
+unmasker("This course will teach you all about <mask> models.", top_k=2)
+```
+
+```python out
+[{'sequence': 'This course will teach you all about mathematical models.',
+  'score': 0.19619831442832947,
+  'token': 30412,
+  'token_str': ' mathematical'},
+ {'sequence': 'This course will teach you all about computational models.',
+  'score': 0.04052725434303284,
+  'token': 38163,
+  'token_str': ' computational'}]
+```
+
+The `top_k` argument controls how many possibilities you want to be displayed. Note that here the model fills in the special `<mask>` word, which is often referred to as a *mask token*. Other mask-filling models might have different mask tokens, so it's always good to verify the proper mask word when exploring other models. One way to check it is by looking at the mask word used in the widget.
+
+<Tip>
+
+✏️ **Try it out!** Search for the `bert-base-cased` model on the Hub and identify its mask word in the Inference API widget. What does this model predict for the sentence in our `pipeline` example above?
+
+</Tip>
+
+## Named entity recognition
+
+Named entity recognition (NER) is a task where the model has to find which parts of the input text correspond to entities such as persons, locations, or organizations. Let's look at an example:
+
+```python
+from transformers import pipeline
+
+ner = pipeline("ner", grouped_entities=True)
+ner("My name is Sylvain and I work at Hugging Face in Brooklyn.")
+```
+
+```python out
+[{'entity_group': 'PER', 'score': 0.99816, 'word': 'Sylvain', 'start': 11, 'end': 18}, 
+ {'entity_group': 'ORG', 'score': 0.97960, 'word': 'Hugging Face', 'start': 33, 'end': 45}, 
+ {'entity_group': 'LOC', 'score': 0.99321, 'word': 'Brooklyn', 'start': 49, 'end': 57}
+]
+```
+
+Here the model correctly identified that Sylvain is a person (PER), Hugging Face an organization (ORG), and Brooklyn a location (LOC).
+
+We pass the option `grouped_entities=True` in the pipeline creation function to tell the pipeline to regroup together the parts of the sentence that correspond to the same entity: here the model correctly grouped "Hugging" and "Face" as a single organization, even though the name consists of multiple words. In fact, as we will see in the next chapter, the preprocessing even splits some words into smaller parts. For instance, `Sylvain` is split into four pieces: `S`, `##yl`, `##va`, and `##in`. In the post-processing step, the pipeline successfully regrouped those pieces.
+
+<Tip>
+
+✏️ **Try it out!** Search the Model Hub for a model able to do part-of-speech tagging (usually abbreviated as POS) in English. What does this model predict for the sentence in the example above?
+
+</Tip>
+
+## Question answering
+
+The `question-answering` pipeline answers questions using information from a given context:
+
+```python
+from transformers import pipeline
+
+question_answerer = pipeline("question-answering")
+question_answerer(
+    question="Where do I work?",
+    context="My name is Sylvain and I work at Hugging Face in Brooklyn",
+)
+```
+
+```python out
+{'score': 0.6385916471481323, 'start': 33, 'end': 45, 'answer': 'Hugging Face'}
+```
+
+Note that this pipeline works by extracting information from the provided context; it does not generate the answer.
+
+## Summarization
+
+Summarization is the task of reducing a text into a shorter text while keeping all (or most) of the important aspects referenced in the text. Here's an example:
+
+```python
+from transformers import pipeline
+
+summarizer = pipeline("summarization")
+summarizer(
+    """
+    America has changed dramatically during recent years. Not only has the number of 
+    graduates in traditional engineering disciplines such as mechanical, civil, 
+    electrical, chemical, and aeronautical engineering declined, but in most of 
+    the premier American universities engineering curricula now concentrate on 
+    and encourage largely the study of engineering science. As a result, there 
+    are declining offerings in engineering subjects dealing with infrastructure, 
+    the environment, and related issues, and greater concentration on high 
+    technology subjects, largely supporting increasingly complex scientific 
+    developments. While the latter is important, it should not be at the expense 
+    of more traditional engineering.
+
+    Rapidly developing economies such as China and India, as well as other 
+    industrial countries in Europe and Asia, continue to encourage and advance 
+    the teaching of engineering. Both China and India, respectively, graduate 
+    six and eight times as many traditional engineers as does the United States. 
+    Other industrial countries at minimum maintain their output, while America 
+    suffers an increasingly serious decline in the number of engineering graduates 
+    and a lack of well-educated engineers.
+"""
+)
+```
+
+```python out
+[{'summary_text': ' America has changed dramatically during recent years . The '
+                  'number of engineering graduates in the U.S. has declined in '
+                  'traditional engineering disciplines such as mechanical, civil '
+                  ', electrical, chemical, and aeronautical engineering . Rapidly '
+                  'developing economies such as China and India, as well as other '
+                  'industrial countries in Europe and Asia, continue to encourage '
+                  'and advance engineering .'}]
+```
+
+Like with text generation, you can specify a `max_length` or a `min_length` for the result.
+
+
+## Translation
+
+For translation, you can use a default model if you provide a language pair in the task name (such as `"translation_en_to_fr"`), but the easiest way is to pick the model you want to use on the [Model Hub](https://huggingface.co/models). Here we'll try translating from French to English:
+
+```python
+from transformers import pipeline
+
+translator = pipeline("translation", model="Helsinki-NLP/opus-mt-fr-en")
+translator("Ce cours est produit par Hugging Face.")
+```
+
+```python out
+[{'translation_text': 'This course is produced by Hugging Face.'}]
+```
+
+Like with text generation and summarization, you can specify a `max_length` or a `min_length` for the result.
+
+<Tip>
+
+✏️ **Try it out!** Search for translation models in other languages and try to translate the previous sentence into a few different languages.
+
+</Tip>
+
+The pipelines shown so far are mostly for demonstrative purposes. They were programmed for specific tasks and cannot perform variations of them. In the next chapter, you'll learn what's inside a `pipeline()` function and how to customize its behavior.

From 762d3b214e7024484c2c24910f7f10852a2691dc Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Wed, 27 Apr 2022 22:52:24 +0100
Subject: [PATCH 067/127] Translates title of chapter1/3

---
 chapters/it/_toctree.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/chapters/it/_toctree.yml b/chapters/it/_toctree.yml
index 57704102a..0663740fe 100644
--- a/chapters/it/_toctree.yml
+++ b/chapters/it/_toctree.yml
@@ -10,4 +10,4 @@
   - local: chapter1/2
     title: Natural Language Processing
   - local: chapter1/3
-    title: Transformers, what can they do?
+    title: Transformer, per fare cosa?

From d6f1327594f9892c7a1a16b7013621ee9330ac24 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Thu, 28 Apr 2022 09:21:50 +0100
Subject: [PATCH 068/127] Deletes chapter1/3

---
 chapters/it/_toctree.yml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/chapters/it/_toctree.yml b/chapters/it/_toctree.yml
index 0663740fe..f6d1fd6c4 100644
--- a/chapters/it/_toctree.yml
+++ b/chapters/it/_toctree.yml
@@ -9,5 +9,3 @@
     title: Introduzione
   - local: chapter1/2
     title: Natural Language Processing
-  - local: chapter1/3
-    title: Transformer, per fare cosa?

From c7f47167249f99df3af992b98ccd33f42df618be Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Thu, 28 Apr 2022 09:22:13 +0100
Subject: [PATCH 069/127] Deletes 3.mdx

---
 chapters/it/chapter1/3.mdx | 329 -------------------------------------
 1 file changed, 329 deletions(-)
 delete mode 100644 chapters/it/chapter1/3.mdx

diff --git a/chapters/it/chapter1/3.mdx b/chapters/it/chapter1/3.mdx
deleted file mode 100644
index d09b79d05..000000000
--- a/chapters/it/chapter1/3.mdx
+++ /dev/null
@@ -1,329 +0,0 @@
-# Transformer, per fare cosa?
-
-<DocNotebookDropdown
-  classNames="absolute z-10 right-0 top-0"
-  options={[
-    {label: "Google Colab", value: "https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter1/section3.ipynb"},
-    {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter1/section3.ipynb"},
-]} />
-
-In this section, we will look at what Transformer models can do and use our first tool from the 🤗 Transformers library: the `pipeline()` function.
-
-<Tip>
-👀 See that <em>Open in Colab</em> button on the top right? Click on it to open a Google Colab notebook with all the code samples of this section. This button will be present in any section containing code examples. 
-
-If you want to run the examples locally, we recommend taking a look at the <a href="/course/chapter0">setup</a>.
-</Tip>
-
-## Transformers are everywhere!
-
-Transformer models are used to solve all kinds of NLP tasks, like the ones mentioned in the previous section. Here are some of the companies and organizations using Hugging Face and Transformer models, who also contribute back to the community by sharing their models:
-
-<img src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/companies.PNG" alt="Companies using Hugging Face" width="100%">
-
-The [🤗 Transformers library](https://github.com/huggingface/transformers) provides the functionality to create and use those shared models. The [Model Hub](https://huggingface.co/models) contains thousands of pretrained models that anyone can download and use. You can also upload your own models to the Hub!
-
-<Tip>
-⚠️ The Hugging Face Hub is not limited to Transformer models. Anyone can share any kind of models or datasets they want! <a href="https://huggingface.co/join">Create a huggingface.co</a> account to benefit from all available features!
-</Tip>
-
-Before diving into how Transformer models work under the hood, let's look at a few examples of how they can be used to solve some interesting NLP problems.
-
-## Working with pipelines
-
-<Youtube id="tiZFewofSLM" />
-
-The most basic object in the 🤗 Transformers library is the `pipeline()` function. It connects a model with its necessary preprocessing and postprocessing steps, allowing us to directly input any text and get an intelligible answer:
-
-```python
-from transformers import pipeline
-
-classifier = pipeline("sentiment-analysis")
-classifier("I've been waiting for a HuggingFace course my whole life.")
-```
-
-```python out
-[{'label': 'POSITIVE', 'score': 0.9598047137260437}]
-```
-
-We can even pass several sentences!
-
-```python
-classifier(
-    ["I've been waiting for a HuggingFace course my whole life.", "I hate this so much!"]
-)
-```
-
-```python out
-[{'label': 'POSITIVE', 'score': 0.9598047137260437},
- {'label': 'NEGATIVE', 'score': 0.9994558095932007}]
-```
-
-By default, this pipeline selects a particular pretrained model that has been fine-tuned for sentiment analysis in English. The model is downloaded and cached when you create the `classifier` object. If you rerun the command, the cached model will be used instead and there is no need to download the model again.
-
-There are three main steps involved when you pass some text to a pipeline:
-
-1. The text is preprocessed into a format the model can understand.
-2. The preprocessed inputs are passed to the model.
-3. The predictions of the model are post-processed, so you can make sense of them.
-
-
-Some of the currently [available pipelines](https://huggingface.co/transformers/main_classes/pipelines.html) are:
-
-- `feature-extraction` (get the vector representation of a text)
-- `fill-mask`
-- `ner` (named entity recognition)
-- `question-answering`
-- `sentiment-analysis`
-- `summarization`
-- `text-generation`
-- `translation`
-- `zero-shot-classification`
-
-Let's have a look at a few of these!
-
-## Zero-shot classification
-
-We'll start by tackling a more challenging task where we need to classify texts that haven't been labelled. This is a common scenario in real-world projects because annotating text is usually time-consuming and requires domain expertise. For this use case, the `zero-shot-classification` pipeline is very powerful: it allows you to specify which labels to use for the classification, so you don't have to rely on the labels of the pretrained model. You've already seen how the model can classify a sentence as positive or negative using those two labels — but it can also classify the text using any other set of labels you like.
-
-```python
-from transformers import pipeline
-
-classifier = pipeline("zero-shot-classification")
-classifier(
-    "This is a course about the Transformers library",
-    candidate_labels=["education", "politics", "business"],
-)
-```
-
-```python out
-{'sequence': 'This is a course about the Transformers library',
- 'labels': ['education', 'business', 'politics'],
- 'scores': [0.8445963859558105, 0.111976258456707, 0.043427448719739914]}
-```
-
-This pipeline is called _zero-shot_ because you don't need to fine-tune the model on your data to use it. It can directly return probability scores for any list of labels you want!
-
-<Tip>
-
-✏️ **Try it out!** Play around with your own sequences and labels and see how the model behaves.
-
-</Tip>
-
-
-## Text generation
-
-Now let's see how to use a pipeline to generate some text. The main idea here is that you provide a prompt and the model will auto-complete it by generating the remaining text. This is similar to the predictive text feature that is found on many phones. Text generation involves randomness, so it's normal if you don't get the same results as shown below.
-
-```python
-from transformers import pipeline
-
-generator = pipeline("text-generation")
-generator("In this course, we will teach you how to")
-```
-
-```python out
-[{'generated_text': 'In this course, we will teach you how to understand and use '
-                    'data flow and data interchange when handling user data. We '
-                    'will be working with one or more of the most commonly used '
-                    'data flows — data flows of various types, as seen by the '
-                    'HTTP'}]
-```
-
-You can control how many different sequences are generated with the argument `num_return_sequences` and the total length of the output text with the argument `max_length`.
-
-<Tip>
-
-✏️ **Try it out!** Use the `num_return_sequences` and `max_length` arguments to generate two sentences of 15 words each.
-
-</Tip>
-
-
-## Using any model from the Hub in a pipeline
-
-The previous examples used the default model for the task at hand, but you can also choose a particular model from the Hub to use in a pipeline for a specific task — say, text generation. Go to the [Model Hub](https://huggingface.co/models) and click on the corresponding tag on the left to display only the supported models for that task. You should get to a page like [this one](https://huggingface.co/models?pipeline_tag=text-generation).
-
-Let's try the [`distilgpt2`](https://huggingface.co/distilgpt2) model! Here's how to load it in the same pipeline as before:
-
-```python
-from transformers import pipeline
-
-generator = pipeline("text-generation", model="distilgpt2")
-generator(
-    "In this course, we will teach you how to",
-    max_length=30,
-    num_return_sequences=2,
-)
-```
-
-```python out
-[{'generated_text': 'In this course, we will teach you how to manipulate the world and '
-                    'move your mental and physical capabilities to your advantage.'},
- {'generated_text': 'In this course, we will teach you how to become an expert and '
-                    'practice realtime, and with a hands on experience on both real '
-                    'time and real'}]
-```
-
-You can refine your search for a model by clicking on the language tags, and pick a model that will generate text in another language. The Model Hub even contains checkpoints for multilingual models that support several languages.
-
-Once you select a model by clicking on it, you'll see that there is a widget enabling you to try it directly online. This way you can quickly test the model's capabilities before downloading it.
-
-<Tip>
-
-✏️ **Try it out!** Use the filters to find a text generation model for another language. Feel free to play with the widget and use it in a pipeline!
-
-</Tip>
-
-### The Inference API
-
-All the models can be tested directly through your browser using the Inference API, which is available on the Hugging Face [website](https://huggingface.co/). You can play with the model directly on this page by inputting custom text and watching the model process the input data.
-
-The Inference API that powers the widget is also available as a paid product, which comes in handy if you need it for your workflows. See the [pricing page](https://huggingface.co/pricing) for more details.
-
-## Mask filling
-
-The next pipeline you'll try is `fill-mask`. The idea of this task is to fill in the blanks in a given text:
-
-```python
-from transformers import pipeline
-
-unmasker = pipeline("fill-mask")
-unmasker("This course will teach you all about <mask> models.", top_k=2)
-```
-
-```python out
-[{'sequence': 'This course will teach you all about mathematical models.',
-  'score': 0.19619831442832947,
-  'token': 30412,
-  'token_str': ' mathematical'},
- {'sequence': 'This course will teach you all about computational models.',
-  'score': 0.04052725434303284,
-  'token': 38163,
-  'token_str': ' computational'}]
-```
-
-The `top_k` argument controls how many possibilities you want to be displayed. Note that here the model fills in the special `<mask>` word, which is often referred to as a *mask token*. Other mask-filling models might have different mask tokens, so it's always good to verify the proper mask word when exploring other models. One way to check it is by looking at the mask word used in the widget.
-
-<Tip>
-
-✏️ **Try it out!** Search for the `bert-base-cased` model on the Hub and identify its mask word in the Inference API widget. What does this model predict for the sentence in our `pipeline` example above?
-
-</Tip>
-
-## Named entity recognition
-
-Named entity recognition (NER) is a task where the model has to find which parts of the input text correspond to entities such as persons, locations, or organizations. Let's look at an example:
-
-```python
-from transformers import pipeline
-
-ner = pipeline("ner", grouped_entities=True)
-ner("My name is Sylvain and I work at Hugging Face in Brooklyn.")
-```
-
-```python out
-[{'entity_group': 'PER', 'score': 0.99816, 'word': 'Sylvain', 'start': 11, 'end': 18}, 
- {'entity_group': 'ORG', 'score': 0.97960, 'word': 'Hugging Face', 'start': 33, 'end': 45}, 
- {'entity_group': 'LOC', 'score': 0.99321, 'word': 'Brooklyn', 'start': 49, 'end': 57}
-]
-```
-
-Here the model correctly identified that Sylvain is a person (PER), Hugging Face an organization (ORG), and Brooklyn a location (LOC).
-
-We pass the option `grouped_entities=True` in the pipeline creation function to tell the pipeline to regroup together the parts of the sentence that correspond to the same entity: here the model correctly grouped "Hugging" and "Face" as a single organization, even though the name consists of multiple words. In fact, as we will see in the next chapter, the preprocessing even splits some words into smaller parts. For instance, `Sylvain` is split into four pieces: `S`, `##yl`, `##va`, and `##in`. In the post-processing step, the pipeline successfully regrouped those pieces.
-
-<Tip>
-
-✏️ **Try it out!** Search the Model Hub for a model able to do part-of-speech tagging (usually abbreviated as POS) in English. What does this model predict for the sentence in the example above?
-
-</Tip>
-
-## Question answering
-
-The `question-answering` pipeline answers questions using information from a given context:
-
-```python
-from transformers import pipeline
-
-question_answerer = pipeline("question-answering")
-question_answerer(
-    question="Where do I work?",
-    context="My name is Sylvain and I work at Hugging Face in Brooklyn",
-)
-```
-
-```python out
-{'score': 0.6385916471481323, 'start': 33, 'end': 45, 'answer': 'Hugging Face'}
-```
-
-Note that this pipeline works by extracting information from the provided context; it does not generate the answer.
-
-## Summarization
-
-Summarization is the task of reducing a text into a shorter text while keeping all (or most) of the important aspects referenced in the text. Here's an example:
-
-```python
-from transformers import pipeline
-
-summarizer = pipeline("summarization")
-summarizer(
-    """
-    America has changed dramatically during recent years. Not only has the number of 
-    graduates in traditional engineering disciplines such as mechanical, civil, 
-    electrical, chemical, and aeronautical engineering declined, but in most of 
-    the premier American universities engineering curricula now concentrate on 
-    and encourage largely the study of engineering science. As a result, there 
-    are declining offerings in engineering subjects dealing with infrastructure, 
-    the environment, and related issues, and greater concentration on high 
-    technology subjects, largely supporting increasingly complex scientific 
-    developments. While the latter is important, it should not be at the expense 
-    of more traditional engineering.
-
-    Rapidly developing economies such as China and India, as well as other 
-    industrial countries in Europe and Asia, continue to encourage and advance 
-    the teaching of engineering. Both China and India, respectively, graduate 
-    six and eight times as many traditional engineers as does the United States. 
-    Other industrial countries at minimum maintain their output, while America 
-    suffers an increasingly serious decline in the number of engineering graduates 
-    and a lack of well-educated engineers.
-"""
-)
-```
-
-```python out
-[{'summary_text': ' America has changed dramatically during recent years . The '
-                  'number of engineering graduates in the U.S. has declined in '
-                  'traditional engineering disciplines such as mechanical, civil '
-                  ', electrical, chemical, and aeronautical engineering . Rapidly '
-                  'developing economies such as China and India, as well as other '
-                  'industrial countries in Europe and Asia, continue to encourage '
-                  'and advance engineering .'}]
-```
-
-Like with text generation, you can specify a `max_length` or a `min_length` for the result.
-
-
-## Translation
-
-For translation, you can use a default model if you provide a language pair in the task name (such as `"translation_en_to_fr"`), but the easiest way is to pick the model you want to use on the [Model Hub](https://huggingface.co/models). Here we'll try translating from French to English:
-
-```python
-from transformers import pipeline
-
-translator = pipeline("translation", model="Helsinki-NLP/opus-mt-fr-en")
-translator("Ce cours est produit par Hugging Face.")
-```
-
-```python out
-[{'translation_text': 'This course is produced by Hugging Face.'}]
-```
-
-Like with text generation and summarization, you can specify a `max_length` or a `min_length` for the result.
-
-<Tip>
-
-✏️ **Try it out!** Search for translation models in other languages and try to translate the previous sentence into a few different languages.
-
-</Tip>
-
-The pipelines shown so far are mostly for demonstrative purposes. They were programmed for specific tasks and cannot perform variations of them. In the next chapter, you'll learn what's inside a `pipeline()` function and how to customize its behavior.

From a87f26b715fc4486214f244ac1ffe6de1dc3a4c7 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Thu, 28 Apr 2022 15:26:34 +0100
Subject: [PATCH 070/127] Adds chapter 1/3

---
 chapters/it/_toctree.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/chapters/it/_toctree.yml b/chapters/it/_toctree.yml
index f6d1fd6c4..9870e59a6 100644
--- a/chapters/it/_toctree.yml
+++ b/chapters/it/_toctree.yml
@@ -9,3 +9,5 @@
     title: Introduzione
   - local: chapter1/2
     title: Natural Language Processing
+  - local: chapter1/3
+    title: Cosa sanno fare i Transformer?

From 9c883217e500cf112f8417fae8d7a96720e09099 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Thu, 28 Apr 2022 15:28:05 +0100
Subject: [PATCH 071/127] Creates 3.mdx

---
 chapters/it/chapter1/3.mdx | 329 +++++++++++++++++++++++++++++++++++++
 1 file changed, 329 insertions(+)
 create mode 100644 chapters/it/chapter1/3.mdx

diff --git a/chapters/it/chapter1/3.mdx b/chapters/it/chapter1/3.mdx
new file mode 100644
index 000000000..ac22e7e8f
--- /dev/null
+++ b/chapters/it/chapter1/3.mdx
@@ -0,0 +1,329 @@
+# Transformers, what can they do?
+
+<DocNotebookDropdown
+  classNames="absolute z-10 right-0 top-0"
+  options={[
+    {label: "Google Colab", value: "https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter1/section3.ipynb"},
+    {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter1/section3.ipynb"},
+]} />
+
+In this section, we will look at what Transformer models can do and use our first tool from the 🤗 Transformers library: the `pipeline()` function.
+
+<Tip>
+👀 See that <em>Open in Colab</em> button on the top right? Click on it to open a Google Colab notebook with all the code samples of this section. This button will be present in any section containing code examples. 
+
+If you want to run the examples locally, we recommend taking a look at the <a href="/course/chapter0">setup</a>.
+</Tip>
+
+## Transformers are everywhere!
+
+Transformer models are used to solve all kinds of NLP tasks, like the ones mentioned in the previous section. Here are some of the companies and organizations using Hugging Face and Transformer models, who also contribute back to the community by sharing their models:
+
+<img src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/companies.PNG" alt="Companies using Hugging Face" width="100%">
+
+The [🤗 Transformers library](https://github.com/huggingface/transformers) provides the functionality to create and use those shared models. The [Model Hub](https://huggingface.co/models) contains thousands of pretrained models that anyone can download and use. You can also upload your own models to the Hub!
+
+<Tip>
+⚠️ The Hugging Face Hub is not limited to Transformer models. Anyone can share any kind of models or datasets they want! <a href="https://huggingface.co/join">Create a huggingface.co</a> account to benefit from all available features!
+</Tip>
+
+Before diving into how Transformer models work under the hood, let's look at a few examples of how they can be used to solve some interesting NLP problems.
+
+## Working with pipelines
+
+<Youtube id="tiZFewofSLM" />
+
+The most basic object in the 🤗 Transformers library is the `pipeline()` function. It connects a model with its necessary preprocessing and postprocessing steps, allowing us to directly input any text and get an intelligible answer:
+
+```python
+from transformers import pipeline
+
+classifier = pipeline("sentiment-analysis")
+classifier("I've been waiting for a HuggingFace course my whole life.")
+```
+
+```python out
+[{'label': 'POSITIVE', 'score': 0.9598047137260437}]
+```
+
+We can even pass several sentences!
+
+```python
+classifier(
+    ["I've been waiting for a HuggingFace course my whole life.", "I hate this so much!"]
+)
+```
+
+```python out
+[{'label': 'POSITIVE', 'score': 0.9598047137260437},
+ {'label': 'NEGATIVE', 'score': 0.9994558095932007}]
+```
+
+By default, this pipeline selects a particular pretrained model that has been fine-tuned for sentiment analysis in English. The model is downloaded and cached when you create the `classifier` object. If you rerun the command, the cached model will be used instead and there is no need to download the model again.
+
+There are three main steps involved when you pass some text to a pipeline:
+
+1. The text is preprocessed into a format the model can understand.
+2. The preprocessed inputs are passed to the model.
+3. The predictions of the model are post-processed, so you can make sense of them.
+
+
+Some of the currently [available pipelines](https://huggingface.co/transformers/main_classes/pipelines.html) are:
+
+- `feature-extraction` (get the vector representation of a text)
+- `fill-mask`
+- `ner` (named entity recognition)
+- `question-answering`
+- `sentiment-analysis`
+- `summarization`
+- `text-generation`
+- `translation`
+- `zero-shot-classification`
+
+Let's have a look at a few of these!
+
+## Zero-shot classification
+
+We'll start by tackling a more challenging task where we need to classify texts that haven't been labelled. This is a common scenario in real-world projects because annotating text is usually time-consuming and requires domain expertise. For this use case, the `zero-shot-classification` pipeline is very powerful: it allows you to specify which labels to use for the classification, so you don't have to rely on the labels of the pretrained model. You've already seen how the model can classify a sentence as positive or negative using those two labels — but it can also classify the text using any other set of labels you like.
+
+```python
+from transformers import pipeline
+
+classifier = pipeline("zero-shot-classification")
+classifier(
+    "This is a course about the Transformers library",
+    candidate_labels=["education", "politics", "business"],
+)
+```
+
+```python out
+{'sequence': 'This is a course about the Transformers library',
+ 'labels': ['education', 'business', 'politics'],
+ 'scores': [0.8445963859558105, 0.111976258456707, 0.043427448719739914]}
+```
+
+This pipeline is called _zero-shot_ because you don't need to fine-tune the model on your data to use it. It can directly return probability scores for any list of labels you want!
+
+<Tip>
+
+✏️ **Try it out!** Play around with your own sequences and labels and see how the model behaves.
+
+</Tip>
+
+
+## Text generation
+
+Now let's see how to use a pipeline to generate some text. The main idea here is that you provide a prompt and the model will auto-complete it by generating the remaining text. This is similar to the predictive text feature that is found on many phones. Text generation involves randomness, so it's normal if you don't get the same results as shown below.
+
+```python
+from transformers import pipeline
+
+generator = pipeline("text-generation")
+generator("In this course, we will teach you how to")
+```
+
+```python out
+[{'generated_text': 'In this course, we will teach you how to understand and use '
+                    'data flow and data interchange when handling user data. We '
+                    'will be working with one or more of the most commonly used '
+                    'data flows — data flows of various types, as seen by the '
+                    'HTTP'}]
+```
+
+You can control how many different sequences are generated with the argument `num_return_sequences` and the total length of the output text with the argument `max_length`.
+
+<Tip>
+
+✏️ **Try it out!** Use the `num_return_sequences` and `max_length` arguments to generate two sentences of 15 words each.
+
+</Tip>
+
+
+## Using any model from the Hub in a pipeline
+
+The previous examples used the default model for the task at hand, but you can also choose a particular model from the Hub to use in a pipeline for a specific task — say, text generation. Go to the [Model Hub](https://huggingface.co/models) and click on the corresponding tag on the left to display only the supported models for that task. You should get to a page like [this one](https://huggingface.co/models?pipeline_tag=text-generation).
+
+Let's try the [`distilgpt2`](https://huggingface.co/distilgpt2) model! Here's how to load it in the same pipeline as before:
+
+```python
+from transformers import pipeline
+
+generator = pipeline("text-generation", model="distilgpt2")
+generator(
+    "In this course, we will teach you how to",
+    max_length=30,
+    num_return_sequences=2,
+)
+```
+
+```python out
+[{'generated_text': 'In this course, we will teach you how to manipulate the world and '
+                    'move your mental and physical capabilities to your advantage.'},
+ {'generated_text': 'In this course, we will teach you how to become an expert and '
+                    'practice realtime, and with a hands on experience on both real '
+                    'time and real'}]
+```
+
+You can refine your search for a model by clicking on the language tags, and pick a model that will generate text in another language. The Model Hub even contains checkpoints for multilingual models that support several languages.
+
+Once you select a model by clicking on it, you'll see that there is a widget enabling you to try it directly online. This way you can quickly test the model's capabilities before downloading it.
+
+<Tip>
+
+✏️ **Try it out!** Use the filters to find a text generation model for another language. Feel free to play with the widget and use it in a pipeline!
+
+</Tip>
+
+### The Inference API
+
+All the models can be tested directly through your browser using the Inference API, which is available on the Hugging Face [website](https://huggingface.co/). You can play with the model directly on this page by inputting custom text and watching the model process the input data.
+
+The Inference API that powers the widget is also available as a paid product, which comes in handy if you need it for your workflows. See the [pricing page](https://huggingface.co/pricing) for more details.
+
+## Mask filling
+
+The next pipeline you'll try is `fill-mask`. The idea of this task is to fill in the blanks in a given text:
+
+```python
+from transformers import pipeline
+
+unmasker = pipeline("fill-mask")
+unmasker("This course will teach you all about <mask> models.", top_k=2)
+```
+
+```python out
+[{'sequence': 'This course will teach you all about mathematical models.',
+  'score': 0.19619831442832947,
+  'token': 30412,
+  'token_str': ' mathematical'},
+ {'sequence': 'This course will teach you all about computational models.',
+  'score': 0.04052725434303284,
+  'token': 38163,
+  'token_str': ' computational'}]
+```
+
+The `top_k` argument controls how many possibilities you want to be displayed. Note that here the model fills in the special `<mask>` word, which is often referred to as a *mask token*. Other mask-filling models might have different mask tokens, so it's always good to verify the proper mask word when exploring other models. One way to check it is by looking at the mask word used in the widget.
+
+<Tip>
+
+✏️ **Try it out!** Search for the `bert-base-cased` model on the Hub and identify its mask word in the Inference API widget. What does this model predict for the sentence in our `pipeline` example above?
+
+</Tip>
+
+## Named entity recognition
+
+Named entity recognition (NER) is a task where the model has to find which parts of the input text correspond to entities such as persons, locations, or organizations. Let's look at an example:
+
+```python
+from transformers import pipeline
+
+ner = pipeline("ner", grouped_entities=True)
+ner("My name is Sylvain and I work at Hugging Face in Brooklyn.")
+```
+
+```python out
+[{'entity_group': 'PER', 'score': 0.99816, 'word': 'Sylvain', 'start': 11, 'end': 18}, 
+ {'entity_group': 'ORG', 'score': 0.97960, 'word': 'Hugging Face', 'start': 33, 'end': 45}, 
+ {'entity_group': 'LOC', 'score': 0.99321, 'word': 'Brooklyn', 'start': 49, 'end': 57}
+]
+```
+
+Here the model correctly identified that Sylvain is a person (PER), Hugging Face an organization (ORG), and Brooklyn a location (LOC).
+
+We pass the option `grouped_entities=True` in the pipeline creation function to tell the pipeline to regroup together the parts of the sentence that correspond to the same entity: here the model correctly grouped "Hugging" and "Face" as a single organization, even though the name consists of multiple words. In fact, as we will see in the next chapter, the preprocessing even splits some words into smaller parts. For instance, `Sylvain` is split into four pieces: `S`, `##yl`, `##va`, and `##in`. In the post-processing step, the pipeline successfully regrouped those pieces.
+
+<Tip>
+
+✏️ **Try it out!** Search the Model Hub for a model able to do part-of-speech tagging (usually abbreviated as POS) in English. What does this model predict for the sentence in the example above?
+
+</Tip>
+
+## Question answering
+
+The `question-answering` pipeline answers questions using information from a given context:
+
+```python
+from transformers import pipeline
+
+question_answerer = pipeline("question-answering")
+question_answerer(
+    question="Where do I work?",
+    context="My name is Sylvain and I work at Hugging Face in Brooklyn",
+)
+```
+
+```python out
+{'score': 0.6385916471481323, 'start': 33, 'end': 45, 'answer': 'Hugging Face'}
+```
+
+Note that this pipeline works by extracting information from the provided context; it does not generate the answer.
+
+## Summarization
+
+Summarization is the task of reducing a text into a shorter text while keeping all (or most) of the important aspects referenced in the text. Here's an example:
+
+```python
+from transformers import pipeline
+
+summarizer = pipeline("summarization")
+summarizer(
+    """
+    America has changed dramatically during recent years. Not only has the number of 
+    graduates in traditional engineering disciplines such as mechanical, civil, 
+    electrical, chemical, and aeronautical engineering declined, but in most of 
+    the premier American universities engineering curricula now concentrate on 
+    and encourage largely the study of engineering science. As a result, there 
+    are declining offerings in engineering subjects dealing with infrastructure, 
+    the environment, and related issues, and greater concentration on high 
+    technology subjects, largely supporting increasingly complex scientific 
+    developments. While the latter is important, it should not be at the expense 
+    of more traditional engineering.
+
+    Rapidly developing economies such as China and India, as well as other 
+    industrial countries in Europe and Asia, continue to encourage and advance 
+    the teaching of engineering. Both China and India, respectively, graduate 
+    six and eight times as many traditional engineers as does the United States. 
+    Other industrial countries at minimum maintain their output, while America 
+    suffers an increasingly serious decline in the number of engineering graduates 
+    and a lack of well-educated engineers.
+"""
+)
+```
+
+```python out
+[{'summary_text': ' America has changed dramatically during recent years . The '
+                  'number of engineering graduates in the U.S. has declined in '
+                  'traditional engineering disciplines such as mechanical, civil '
+                  ', electrical, chemical, and aeronautical engineering . Rapidly '
+                  'developing economies such as China and India, as well as other '
+                  'industrial countries in Europe and Asia, continue to encourage '
+                  'and advance engineering .'}]
+```
+
+Like with text generation, you can specify a `max_length` or a `min_length` for the result.
+
+
+## Translation
+
+For translation, you can use a default model if you provide a language pair in the task name (such as `"translation_en_to_fr"`), but the easiest way is to pick the model you want to use on the [Model Hub](https://huggingface.co/models). Here we'll try translating from French to English:
+
+```python
+from transformers import pipeline
+
+translator = pipeline("translation", model="Helsinki-NLP/opus-mt-fr-en")
+translator("Ce cours est produit par Hugging Face.")
+```
+
+```python out
+[{'translation_text': 'This course is produced by Hugging Face.'}]
+```
+
+Like with text generation and summarization, you can specify a `max_length` or a `min_length` for the result.
+
+<Tip>
+
+✏️ **Try it out!** Search for translation models in other languages and try to translate the previous sentence into a few different languages.
+
+</Tip>
+
+The pipelines shown so far are mostly for demonstrative purposes. They were programmed for specific tasks and cannot perform variations of them. In the next chapter, you'll learn what's inside a `pipeline()` function and how to customize its behavior.

From 97648eb332a7d69f2705b19d838af385d8a60812 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Thu, 28 Apr 2022 15:33:16 +0100
Subject: [PATCH 072/127] Translates title and first paragraph

---
 chapters/it/chapter1/3.mdx | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/chapters/it/chapter1/3.mdx b/chapters/it/chapter1/3.mdx
index ac22e7e8f..30219e445 100644
--- a/chapters/it/chapter1/3.mdx
+++ b/chapters/it/chapter1/3.mdx
@@ -1,4 +1,4 @@
-# Transformers, what can they do?
+# Cosa sanno fare i Transformer?
 
 <DocNotebookDropdown
   classNames="absolute z-10 right-0 top-0"
@@ -7,7 +7,7 @@
     {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter1/section3.ipynb"},
 ]} />
 
-In this section, we will look at what Transformer models can do and use our first tool from the 🤗 Transformers library: the `pipeline()` function.
+In questa sezione, vedremo di cosa sono capaci i modelli Transformer and useremo il nostro primo strumento della libreria 🤗 Transformers: la funzione `pipeline()`.
 
 <Tip>
 👀 See that <em>Open in Colab</em> button on the top right? Click on it to open a Google Colab notebook with all the code samples of this section. This button will be present in any section containing code examples. 
@@ -15,7 +15,7 @@ In this section, we will look at what Transformer models can do and use our firs
 If you want to run the examples locally, we recommend taking a look at the <a href="/course/chapter0">setup</a>.
 </Tip>
 
-## Transformers are everywhere!
+## I Transformer sono ovunque!
 
 Transformer models are used to solve all kinds of NLP tasks, like the ones mentioned in the previous section. Here are some of the companies and organizations using Hugging Face and Transformer models, who also contribute back to the community by sharing their models:
 

From 63b2e10400166987f4f257e6d54483ebde39f627 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Thu, 28 Apr 2022 16:00:37 +0100
Subject: [PATCH 073/127] Adds translation of last section

---
 chapters/it/chapter1/3.mdx | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/chapters/it/chapter1/3.mdx b/chapters/it/chapter1/3.mdx
index 30219e445..69e9f3284 100644
--- a/chapters/it/chapter1/3.mdx
+++ b/chapters/it/chapter1/3.mdx
@@ -7,21 +7,21 @@
     {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter1/section3.ipynb"},
 ]} />
 
-In questa sezione, vedremo di cosa sono capaci i modelli Transformer and useremo il nostro primo strumento della libreria 🤗 Transformers: la funzione `pipeline()`.
+In questa sezione, vedremo di cosa sono capaci i modelli Transformer and useremo il nostro primo strumento della libreria 🤗 Transformer: la funzione `pipeline()`.
 
 <Tip>
-👀 See that <em>Open in Colab</em> button on the top right? Click on it to open a Google Colab notebook with all the code samples of this section. This button will be present in any section containing code examples. 
+👀 Lo vedi il pulsante <em>Open in Colab</em> in alto a destra? Cliccalo per aprire il blocco note Colab di Google che contiene tutti gli esempi di codice di questa sezione. Ritroverai il pulsante in ogni sezione che contiene esempi di codice. 
 
-If you want to run the examples locally, we recommend taking a look at the <a href="/course/chapter0">setup</a>.
+Se intendi compilare gli esempi localmente, ti consigliamo di dare un'occhiata alla sezione <a href="/course/chapter0">setup</a>.
 </Tip>
 
 ## I Transformer sono ovunque!
 
-Transformer models are used to solve all kinds of NLP tasks, like the ones mentioned in the previous section. Here are some of the companies and organizations using Hugging Face and Transformer models, who also contribute back to the community by sharing their models:
+I modelli Transformer sono utilizzati per eseguire qualsiasi compito di NLP, come ad esempio quelli menzionati nelle sezioni precedenti. Ecco alcune delle aziende e organizzazioni che utilizzano Hugging Face e i modelli Transformer, e contribuiscono alla comunità condividendo i loro propri modelli:
 
 <img src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/companies.PNG" alt="Companies using Hugging Face" width="100%">
 
-The [🤗 Transformers library](https://github.com/huggingface/transformers) provides the functionality to create and use those shared models. The [Model Hub](https://huggingface.co/models) contains thousands of pretrained models that anyone can download and use. You can also upload your own models to the Hub!
+La [libreria 🤗 Transformer](https://github.com/huggingface/transformers) provides the functionality to create and use those shared models. The [Model Hub](https://huggingface.co/models) contains thousands of pretrained models that anyone can download and use. You can also upload your own models to the Hub!
 
 <Tip>
 ⚠️ The Hugging Face Hub is not limited to Transformer models. Anyone can share any kind of models or datasets they want! <a href="https://huggingface.co/join">Create a huggingface.co</a> account to benefit from all available features!
@@ -29,7 +29,7 @@ The [🤗 Transformers library](https://github.com/huggingface/transformers) pro
 
 Before diving into how Transformer models work under the hood, let's look at a few examples of how they can be used to solve some interesting NLP problems.
 
-## Working with pipelines
+## Lavorare con le pipeline
 
 <Youtube id="tiZFewofSLM" />
 
@@ -303,9 +303,9 @@ summarizer(
 Like with text generation, you can specify a `max_length` or a `min_length` for the result.
 
 
-## Translation
+## Traduzione
 
-For translation, you can use a default model if you provide a language pair in the task name (such as `"translation_en_to_fr"`), but the easiest way is to pick the model you want to use on the [Model Hub](https://huggingface.co/models). Here we'll try translating from French to English:
+Per compiti di traduzione, puoi utilizzare un modello di default indicando la coppia linguistica nel nome del compito (come ad esempio `"translation_en_to_fr"`), anche se il metodo più semplice è di scegliere il modello che desideri utilizzare dal [Model Hub](https://huggingface.co/models). Qui in seguito traduciamo dal francese all'inglese:
 
 ```python
 from transformers import pipeline
@@ -318,12 +318,12 @@ translator("Ce cours est produit par Hugging Face.")
 [{'translation_text': 'This course is produced by Hugging Face.'}]
 ```
 
-Like with text generation and summarization, you can specify a `max_length` or a `min_length` for the result.
+Come per le funzioni di generazione testuale e riassunto, è possibile specificare un `max_length` o un `min_length` per il risultato.
 
 <Tip>
 
-✏️ **Try it out!** Search for translation models in other languages and try to translate the previous sentence into a few different languages.
+✏️ **Provaci anche tu!** Cerca modelli di traduzione in altre lingue e prova a tradurre la frase precedente in un paio di lingue diverse.
 
 </Tip>
 
-The pipelines shown so far are mostly for demonstrative purposes. They were programmed for specific tasks and cannot perform variations of them. In the next chapter, you'll learn what's inside a `pipeline()` function and how to customize its behavior.
+Finora abbiamo mostrato pipeline a solo scopo dimostrativo. Tali pipeline sono stati programmati per compiti ben specifici e non sono in grado di eseguire variazioni di questi ultimi. Nel prossimo capitolo, imparerai cosa si nasconde dentro la funzione `pipeline()` e come personalizzarne il comportamento.

From 1bc3f6de65f4cd24ca8b952120de62037204aaef Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Thu, 28 Apr 2022 16:09:48 +0100
Subject: [PATCH 074/127] Finishes translation of second paragraph

---
 chapters/it/chapter1/3.mdx | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/chapters/it/chapter1/3.mdx b/chapters/it/chapter1/3.mdx
index 69e9f3284..c5ac54823 100644
--- a/chapters/it/chapter1/3.mdx
+++ b/chapters/it/chapter1/3.mdx
@@ -17,17 +17,17 @@ Se intendi compilare gli esempi localmente, ti consigliamo di dare un'occhiata a
 
 ## I Transformer sono ovunque!
 
-I modelli Transformer sono utilizzati per eseguire qualsiasi compito di NLP, come ad esempio quelli menzionati nelle sezioni precedenti. Ecco alcune delle aziende e organizzazioni che utilizzano Hugging Face e i modelli Transformer, e contribuiscono alla comunità condividendo i loro propri modelli:
+I modelli Transformer sono utilizzati per eseguire qualsiasi compito di NLP, come ad esempio quelli menzionati nelle sezioni precedenti. Ecco alcune delle aziende e organizzazioni che utilizzano Hugging Face e i modelli Transformer, e contribuiscono a loro volta alla comunità condividendo i loro propri modelli:
 
 <img src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/companies.PNG" alt="Companies using Hugging Face" width="100%">
 
-La [libreria 🤗 Transformer](https://github.com/huggingface/transformers) provides the functionality to create and use those shared models. The [Model Hub](https://huggingface.co/models) contains thousands of pretrained models that anyone can download and use. You can also upload your own models to the Hub!
+La [libreria 🤗 Transformer](https://github.com/huggingface/transformers) fornisce la funzionalità per creare e utilizzare questi modelli condivisi. Il [Model Hub](https://huggingface.co/models) contiene migliaia di modelli pre-addestrati che possono essere scaricati e usati liberamente. Puoi anche caricare i tuoi propri modelli nell'Hub!
 
 <Tip>
-⚠️ The Hugging Face Hub is not limited to Transformer models. Anyone can share any kind of models or datasets they want! <a href="https://huggingface.co/join">Create a huggingface.co</a> account to benefit from all available features!
+⚠️ Hugging Face Hub non è limitato ai soli modelli Transformer. Chiunque può condividere qualsiasi tipo di modello o dataset (*insieme di dati*)! <a href="https://huggingface.co/join">Crea un profilo huggingface.co</a> per approfittare di tutte le feature (*funzioni*) disponibili!
 </Tip>
 
-Before diving into how Transformer models work under the hood, let's look at a few examples of how they can be used to solve some interesting NLP problems.
+Prima di scoprire come i modelli Transformer funzionino dietro le quinte, diamo un occhio a qualche esempio di come questi possano essere utilizzati per risolvere alcuni problemi interessanti di NLP.
 
 ## Lavorare con le pipeline
 

From a43e53c9c209d77e89b7afaebcd148576a405064 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Thu, 28 Apr 2022 16:21:10 +0100
Subject: [PATCH 075/127] Updates translation

---
 chapters/it/chapter1/3.mdx | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/chapters/it/chapter1/3.mdx b/chapters/it/chapter1/3.mdx
index c5ac54823..d230a815c 100644
--- a/chapters/it/chapter1/3.mdx
+++ b/chapters/it/chapter1/3.mdx
@@ -24,7 +24,7 @@ I modelli Transformer sono utilizzati per eseguire qualsiasi compito di NLP, com
 La [libreria 🤗 Transformer](https://github.com/huggingface/transformers) fornisce la funzionalità per creare e utilizzare questi modelli condivisi. Il [Model Hub](https://huggingface.co/models) contiene migliaia di modelli pre-addestrati che possono essere scaricati e usati liberamente. Puoi anche caricare i tuoi propri modelli nell'Hub!
 
 <Tip>
-⚠️ Hugging Face Hub non è limitato ai soli modelli Transformer. Chiunque può condividere qualsiasi tipo di modello o dataset (*insieme di dati*)! <a href="https://huggingface.co/join">Crea un profilo huggingface.co</a> per approfittare di tutte le feature (*funzioni*) disponibili!
+⚠️ Hugging Face Hub non è limitato ai soli modelli Transformer. Chiunque può condividere qualsiasi tipo di modello o dataset (<em>insieme di dati</em>)! <a href="https://huggingface.co/join">Crea un profilo huggingface.co</a> per approfittare di tutte le feature (<em>funzioni</em>) disponibili!
 </Tip>
 
 Prima di scoprire come i modelli Transformer funzionino dietro le quinte, diamo un occhio a qualche esempio di come questi possano essere utilizzati per risolvere alcuni problemi interessanti di NLP.
@@ -33,7 +33,7 @@ Prima di scoprire come i modelli Transformer funzionino dietro le quinte, diamo
 
 <Youtube id="tiZFewofSLM" />
 
-The most basic object in the 🤗 Transformers library is the `pipeline()` function. It connects a model with its necessary preprocessing and postprocessing steps, allowing us to directly input any text and get an intelligible answer:
+L'oggetto più basilare della libreria 🤗 Transformer è la funzione `pipeline()`. Questa connette un modello con tutte le fasi necessarie di preprocessing e postprocessing, permettendoci così di fornire direttamente un qualsiasi testo come input e ottenere una risposta intelligibile:
 
 ```python
 from transformers import pipeline
@@ -46,7 +46,7 @@ classifier("I've been waiting for a HuggingFace course my whole life.")
 [{'label': 'POSITIVE', 'score': 0.9598047137260437}]
 ```
 
-We can even pass several sentences!
+È anche possibile lavorare su più frasi!
 
 ```python
 classifier(

From 8e1fa4b17793094ebc3f72e24b309ca78090eeba Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Thu, 28 Apr 2022 16:22:21 +0100
Subject: [PATCH 076/127] Updates title

---
 chapters/it/chapter1/3.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/chapters/it/chapter1/3.mdx b/chapters/it/chapter1/3.mdx
index d230a815c..ffd37da79 100644
--- a/chapters/it/chapter1/3.mdx
+++ b/chapters/it/chapter1/3.mdx
@@ -1,4 +1,4 @@
-# Cosa sanno fare i Transformer?
+# Cosa fanno i Transformer?
 
 <DocNotebookDropdown
   classNames="absolute z-10 right-0 top-0"

From 98ec8241ad76492f349c7d454eab0fd1accdd5c4 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Thu, 28 Apr 2022 16:22:49 +0100
Subject: [PATCH 077/127] Updates title for chapter1/3

---
 chapters/it/_toctree.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/chapters/it/_toctree.yml b/chapters/it/_toctree.yml
index 9870e59a6..d9c034499 100644
--- a/chapters/it/_toctree.yml
+++ b/chapters/it/_toctree.yml
@@ -10,4 +10,4 @@
   - local: chapter1/2
     title: Natural Language Processing
   - local: chapter1/3
-    title: Cosa sanno fare i Transformer?
+    title: Cosa fanno i Transformer?

From 616bd62eead0d3712a4f7843caa8ec5ddcf4efa6 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Thu, 28 Apr 2022 20:34:59 +0100
Subject: [PATCH 078/127] Adds new translated paragraphs

---
 chapters/it/chapter1/3.mdx | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/chapters/it/chapter1/3.mdx b/chapters/it/chapter1/3.mdx
index ffd37da79..6916f99fb 100644
--- a/chapters/it/chapter1/3.mdx
+++ b/chapters/it/chapter1/3.mdx
@@ -59,20 +59,20 @@ classifier(
  {'label': 'NEGATIVE', 'score': 0.9994558095932007}]
 ```
 
-By default, this pipeline selects a particular pretrained model that has been fine-tuned for sentiment analysis in English. The model is downloaded and cached when you create the `classifier` object. If you rerun the command, the cached model will be used instead and there is no need to download the model again.
+Per default, questo pipeline seleziona un preciso modello pre-addestrato che è stato affinato per il sentiment analysis in inglese. Quando creiamo l'oggetto `classifier`, il modello viene scaricato e memorizzato nella cache. Se inizializziamo di nuovo il comando, verrà utilizzato il modello salvato nella cache e non ci sarà quindi bisogno di scaricare di nuovo il modello.
 
-There are three main steps involved when you pass some text to a pipeline:
+Tre passaggi principali vengono coinvolti quando passiamo del testo in un pipeline:
 
-1. The text is preprocessed into a format the model can understand.
-2. The preprocessed inputs are passed to the model.
-3. The predictions of the model are post-processed, so you can make sense of them.
+1. Il testo è pre-elaborato in un formato che il modello può capire.
+2. Gli input pre-elaborati vengono passati al modello.
+3. Le previsioni del modello sono post-elaborate in in un formato accessibile all'utilizzatore.
 
 
-Some of the currently [available pipelines](https://huggingface.co/transformers/main_classes/pipelines.html) are:
+Tra le [pipeline disponibili](https://huggingface.co/transformers/main_classes/pipelines.html) al momento ci sono:
 
-- `feature-extraction` (get the vector representation of a text)
+- `feature-extraction` (per ottenere la rappresentazione vettoriale di un testo)
 - `fill-mask`
-- `ner` (named entity recognition)
+- `ner` (riconoscimento delle entità nominate, *named entity recognition*)
 - `question-answering`
 - `sentiment-analysis`
 - `summarization`
@@ -80,7 +80,7 @@ Some of the currently [available pipelines](https://huggingface.co/transformers/
 - `translation`
 - `zero-shot-classification`
 
-Let's have a look at a few of these!
+Proviamo a vederne alcune!
 
 ## Zero-shot classification
 

From d279f9006c4eed72430ab36b9165df8cb936f883 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Fri, 29 Apr 2022 20:52:38 +0100
Subject: [PATCH 079/127] Updates translation

---
 chapters/it/chapter1/3.mdx | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/chapters/it/chapter1/3.mdx b/chapters/it/chapter1/3.mdx
index 6916f99fb..e31368842 100644
--- a/chapters/it/chapter1/3.mdx
+++ b/chapters/it/chapter1/3.mdx
@@ -12,7 +12,7 @@ In questa sezione, vedremo di cosa sono capaci i modelli Transformer and useremo
 <Tip>
 👀 Lo vedi il pulsante <em>Open in Colab</em> in alto a destra? Cliccalo per aprire il blocco note Colab di Google che contiene tutti gli esempi di codice di questa sezione. Ritroverai il pulsante in ogni sezione che contiene esempi di codice. 
 
-Se intendi compilare gli esempi localmente, ti consigliamo di dare un'occhiata alla sezione <a href="/course/chapter0">setup</a>.
+Se intendi compilare gli esempi localmente, ti consigliamo di dare un'occhio alla sezione <a href="/course/chapter0">setup</a>.
 </Tip>
 
 ## I Transformer sono ovunque!
@@ -59,7 +59,7 @@ classifier(
  {'label': 'NEGATIVE', 'score': 0.9994558095932007}]
 ```
 
-Per default, questo pipeline seleziona un preciso modello pre-addestrato che è stato affinato per il sentiment analysis in inglese. Quando creiamo l'oggetto `classifier`, il modello viene scaricato e memorizzato nella cache. Se inizializziamo di nuovo il comando, verrà utilizzato il modello salvato nella cache e non ci sarà quindi bisogno di scaricare di nuovo il modello.
+Per default, questa pipeline seleziona un preciso modello pre-addestrato che è stato affinato per il sentiment analysis in inglese. Quando creiamo l'oggetto `classifier`, il modello viene scaricato e memorizzato nella cache. Se inizializziamo di nuovo il comando, verrà utilizzato il modello salvato nella cache e non ci sarà quindi bisogno di scaricare di nuovo il modello.
 
 Tre passaggi principali vengono coinvolti quando passiamo del testo in un pipeline:
 
@@ -111,9 +111,9 @@ This pipeline is called _zero-shot_ because you don't need to fine-tune the mode
 </Tip>
 
 
-## Text generation
+## Generazione di testi
 
-Now let's see how to use a pipeline to generate some text. The main idea here is that you provide a prompt and the model will auto-complete it by generating the remaining text. This is similar to the predictive text feature that is found on many phones. Text generation involves randomness, so it's normal if you don't get the same results as shown below.
+Vediamo ora come utilizzare la pipeline per generare testi. L'idea è di fornire un prompt (*richiesta*) che verrà auto-completato dal modello, il quale genererà il testo mancante. Si tratta di un compito simile alla funzione di scrittura facilitata che troviamo oggi in molti cellulari. La generazione di testi presenta una componente arbitraria, per cui non essere sorpreso/a se non ottieni gli stessi risultati che mostriamo qui sotto.
 
 ```python
 from transformers import pipeline
@@ -130,11 +130,11 @@ generator("In this course, we will teach you how to")
                     'HTTP'}]
 ```
 
-You can control how many different sequences are generated with the argument `num_return_sequences` and the total length of the output text with the argument `max_length`.
+Usando l'argomento `num_return_sequences` puoi controllare quante sequenze diverse vengono generate e, con l'argomento `max_length` la lunghezza totale dell'output testuale.
 
 <Tip>
 
-✏️ **Try it out!** Use the `num_return_sequences` and `max_length` arguments to generate two sentences of 15 words each.
+✏️ **Provaci anche tu!** Usa gli argomenti `num_return_sequences` e `max_length` per generare due frasi di 15 parole ciascuna.
 
 </Tip>
 

From f5b7ce5107fe61ca25cb93e45c4f0d4d13f78191 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Mon, 2 May 2022 20:49:07 +0100
Subject: [PATCH 080/127] Adds 2 translated sections

---
 chapters/it/chapter1/3.mdx | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/chapters/it/chapter1/3.mdx b/chapters/it/chapter1/3.mdx
index e31368842..60968881b 100644
--- a/chapters/it/chapter1/3.mdx
+++ b/chapters/it/chapter1/3.mdx
@@ -238,9 +238,9 @@ We pass the option `grouped_entities=True` in the pipeline creation function to
 
 </Tip>
 
-## Question answering
+## Risposta a domande
 
-The `question-answering` pipeline answers questions using information from a given context:
+La pipeline `question-answering` risponde a domande utilizzando informazioni da un contesto prestabilito:
 
 ```python
 from transformers import pipeline
@@ -256,11 +256,11 @@ question_answerer(
 {'score': 0.6385916471481323, 'start': 33, 'end': 45, 'answer': 'Hugging Face'}
 ```
 
-Note that this pipeline works by extracting information from the provided context; it does not generate the answer.
+Nota che questa pipeline non genera risposte ma estrae informazioni da un contesto prestabilito.
 
-## Summarization
+## Riassunto
 
-Summarization is the task of reducing a text into a shorter text while keeping all (or most) of the important aspects referenced in the text. Here's an example:
+Quello del riassunto è un compito che trasforma un testo in un testo più breve, conservando tutti (o quasi) gli argomenti più importanti del testo di partenza. Ecco un esempio:
 
 ```python
 from transformers import pipeline
@@ -300,7 +300,7 @@ summarizer(
                   'and advance engineering .'}]
 ```
 
-Like with text generation, you can specify a `max_length` or a `min_length` for the result.
+Come nella generazione di testi, puoi specificare un `max_length` o `min_length` per il testo target.
 
 
 ## Traduzione

From fda11887ac769ff1a1e1a788c4349ed22ddc380b Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Mon, 2 May 2022 20:50:18 +0100
Subject: [PATCH 081/127] Fixes typo

---
 chapters/it/chapter1/2.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/chapters/it/chapter1/2.mdx b/chapters/it/chapter1/2.mdx
index 8e9191d8a..a5845ca54 100644
--- a/chapters/it/chapter1/2.mdx
+++ b/chapters/it/chapter1/2.mdx
@@ -16,6 +16,6 @@ La seguente è una lista dei più comuni compiti di NLP, ognuno accompagnato da
 
 NLP non si limita però ai soli testi scritti, e tratta anche sfide complesse in riconoscimento vocale e computer vision (*elaborazione di dati visuali*), quali la generazione di trascrizioni di campioni audio o la descrizione di immagini.
 
-## Perché constituisce una sfida?
+## Perché costituisce una sfida?
 
 I computer non elaborano le informazioni allo stesso modo degli umani. Ad esempio, quando leggiamo la frase "Ho fame," ne capiamo senza difficoltà il senso. Allo stesso modo, date due frasi quali "Ho fame" e "Sono triste," riusciamo facilmente a determinarne il livello di similarità. Per i modelli di machine learning (ML), tali compiti sono più difficili. Il testo deve essere elaborato in un modo che permetta al modello di imparare da esso. E siccome il linguaggio è complesso, il modo in cui l'elaborazione va svolta dev'essere studiato con cura. Molta ricerca è stata fatta su come rappresentare i testi, e nel prossimo capitolo vedremo alcuni di questi metodi.

From bd910f0bc714a7b8826b2fe266d08a1436fd98ba Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Mon, 2 May 2022 21:13:32 +0100
Subject: [PATCH 082/127] Updates translation

---
 chapters/it/chapter1/3.mdx | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/chapters/it/chapter1/3.mdx b/chapters/it/chapter1/3.mdx
index 60968881b..5a8cbd8f7 100644
--- a/chapters/it/chapter1/3.mdx
+++ b/chapters/it/chapter1/3.mdx
@@ -84,7 +84,7 @@ Proviamo a vederne alcune!
 
 ## Zero-shot classification
 
-We'll start by tackling a more challenging task where we need to classify texts that haven't been labelled. This is a common scenario in real-world projects because annotating text is usually time-consuming and requires domain expertise. For this use case, the `zero-shot-classification` pipeline is very powerful: it allows you to specify which labels to use for the classification, so you don't have to rely on the labels of the pretrained model. You've already seen how the model can classify a sentence as positive or negative using those two labels — but it can also classify the text using any other set of labels you like.
+Cominceremo con l'affrontare un compito impegnativo che consiste nella classificazione di testi non etichettati. Si tratta di uno scenario comune in molti progetti pratici perché l'annotazione testuale richiede tempo e competenza settoriale. In questo caso d'uso, la pipeline `zero-shot-classification` è molto potente e permette di specificare le etichette da utilizzare per la classificazione, in modo da non dover fare affidamento sulle etichette del modello pre-addestrato. Abbiamo già visto come il modello riesca a classificare una frase utilizzando le etichette 'positiva' e 'negativa', ma è anche possibile classificare testi utilizzando una qualsiasi serie di etichette di tua scelta.
 
 ```python
 from transformers import pipeline
@@ -102,11 +102,11 @@ classifier(
  'scores': [0.8445963859558105, 0.111976258456707, 0.043427448719739914]}
 ```
 
-This pipeline is called _zero-shot_ because you don't need to fine-tune the model on your data to use it. It can directly return probability scores for any list of labels you want!
+Questa pipeline si chiama _zero-shot_ perché non hai bisogno di affinare il modello usando i tuoi dati per poterlo utilizzare. È direttamente in grado di generare una previsione probabilistica per qualsiasi lista di etichette tu voglia!
 
 <Tip>
 
-✏️ **Try it out!** Play around with your own sequences and labels and see how the model behaves.
+✏️ **Provaci anche tu!** Divertiti creando sequenze ed etichette e osserva come si comporta il modello.
 
 </Tip>
 
@@ -170,7 +170,7 @@ Once you select a model by clicking on it, you'll see that there is a widget ena
 
 <Tip>
 
-✏️ **Try it out!** Use the filters to find a text generation model for another language. Feel free to play with the widget and use it in a pipeline!
+✏️ **Provaci anche tu!** Use the filters to find a text generation model for another language. Feel free to play with the widget and use it in a pipeline!
 
 </Tip>
 
@@ -206,13 +206,13 @@ The `top_k` argument controls how many possibilities you want to be displayed. N
 
 <Tip>
 
-✏️ **Try it out!** Search for the `bert-base-cased` model on the Hub and identify its mask word in the Inference API widget. What does this model predict for the sentence in our `pipeline` example above?
+✏️ **Provaci anche tu!** Search for the `bert-base-cased` model on the Hub and identify its mask word in the Inference API widget. What does this model predict for the sentence in our `pipeline` example above?
 
 </Tip>
 
-## Named entity recognition
+## Riconoscimento delle entità nominate
 
-Named entity recognition (NER) is a task where the model has to find which parts of the input text correspond to entities such as persons, locations, or organizations. Let's look at an example:
+Il riconoscimento di entità nominate (*Named entity recognition*, NER) is a task where the model has to find which parts of the input text correspond to entities such as persons, locations, or organizations. Let's look at an example:
 
 ```python
 from transformers import pipeline
@@ -234,7 +234,7 @@ We pass the option `grouped_entities=True` in the pipeline creation function to
 
 <Tip>
 
-✏️ **Try it out!** Search the Model Hub for a model able to do part-of-speech tagging (usually abbreviated as POS) in English. What does this model predict for the sentence in the example above?
+✏️ **Provaci anche tu!** Nel Model Hub, cerca un dodello capace di effettuare part-of-speech tagging (comunemente abbreviato come POS) in inglese. Cosa predice il modello per la frase nell'esempio qui sopra?
 
 </Tip>
 

From ccb36d0570b59b23261f7b951988c46cde7e5fc7 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Mon, 2 May 2022 21:24:29 +0100
Subject: [PATCH 083/127] Translates NER

---
 chapters/it/chapter1/3.mdx | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/chapters/it/chapter1/3.mdx b/chapters/it/chapter1/3.mdx
index 5a8cbd8f7..09b07b1d8 100644
--- a/chapters/it/chapter1/3.mdx
+++ b/chapters/it/chapter1/3.mdx
@@ -212,7 +212,7 @@ The `top_k` argument controls how many possibilities you want to be displayed. N
 
 ## Riconoscimento delle entità nominate
 
-Il riconoscimento di entità nominate (*Named entity recognition*, NER) is a task where the model has to find which parts of the input text correspond to entities such as persons, locations, or organizations. Let's look at an example:
+Il riconoscimento di entità nominate (*Named entity recognition*, NER) è un compito in cui il modello deve determinare quali parti dell'input testuale corrispondono a entità quali persone, località, o organizzazioni. Guardiamo a un esempio:
 
 ```python
 from transformers import pipeline
@@ -228,13 +228,13 @@ ner("My name is Sylvain and I work at Hugging Face in Brooklyn.")
 ]
 ```
 
-Here the model correctly identified that Sylvain is a person (PER), Hugging Face an organization (ORG), and Brooklyn a location (LOC).
+Qui il modello ha correttamente identificato che Sylvain è una persona (PER), Hugging Face un'organizzazione (ORG), e Brooklyn una località (LOC).
 
-We pass the option `grouped_entities=True` in the pipeline creation function to tell the pipeline to regroup together the parts of the sentence that correspond to the same entity: here the model correctly grouped "Hugging" and "Face" as a single organization, even though the name consists of multiple words. In fact, as we will see in the next chapter, the preprocessing even splits some words into smaller parts. For instance, `Sylvain` is split into four pieces: `S`, `##yl`, `##va`, and `##in`. In the post-processing step, the pipeline successfully regrouped those pieces.
+Passiamo l'opzione `grouped_entities=True` nella funzione di creazione della pipeline per raggruppare le parti frasali che corrispondono alla stessa entità: qui il modello raggruppa correttamente "Hugging" e "Face" come singola organizzazione, nonostante il nome sia formato da più parole. A dire in vero, come vedremo nel prossimo capitolo, il preprocessing divide perfino alcune parole in parti più piccole. Ad esempio, `Sylvain` viene suddiviso in quattro parti: `S`, `##yl`, `##va`, and `##in`. Al momento del post-processing, la pipeline ha raggruppato le parti con successo.
 
 <Tip>
 
-✏️ **Provaci anche tu!** Nel Model Hub, cerca un dodello capace di effettuare part-of-speech tagging (comunemente abbreviato come POS) in inglese. Cosa predice il modello per la frase nell'esempio qui sopra?
+✏️ **Provaci anche tu!** Nel Model Hub, cerca un modello capace di effettuare part-of-speech tagging (comunemente abbreviato come POS) in inglese. Cosa predice il modello per la frase nell'esempio qui sopra?
 
 </Tip>
 

From b20eefa1c7327cff44dc116a17f8731b3eda61ad Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Mon, 2 May 2022 21:30:26 +0100
Subject: [PATCH 084/127] Update 3.mdx

---
 chapters/it/chapter1/3.mdx | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/chapters/it/chapter1/3.mdx b/chapters/it/chapter1/3.mdx
index 09b07b1d8..22a20056d 100644
--- a/chapters/it/chapter1/3.mdx
+++ b/chapters/it/chapter1/3.mdx
@@ -113,7 +113,7 @@ Questa pipeline si chiama _zero-shot_ perché non hai bisogno di affinare il mod
 
 ## Generazione di testi
 
-Vediamo ora come utilizzare la pipeline per generare testi. L'idea è di fornire un prompt (*richiesta*) che verrà auto-completato dal modello, il quale genererà il testo mancante. Si tratta di un compito simile alla funzione di scrittura facilitata che troviamo oggi in molti cellulari. La generazione di testi presenta una componente arbitraria, per cui non essere sorpreso/a se non ottieni gli stessi risultati che mostriamo qui sotto.
+Vediamo ora come utilizzare la pipeline per generare testi. L'idea è di fornire un prompt (*richiesta*) che verrà auto-completato dal modello, il quale genererà il testo mancante. Si tratta di un compito simile alla funzione di scrittura facilitata che troviamo al giorno d'oggi in molti cellulari. La generazione di testi presenta una componente arbitraria, per cui non essere sorpreso/a se non ottieni gli stessi risultati che mostriamo qui sotto.
 
 ```python
 from transformers import pipeline
@@ -139,7 +139,7 @@ Usando l'argomento `num_return_sequences` puoi controllare quante sequenze diver
 </Tip>
 
 
-## Using any model from the Hub in a pipeline
+## Utilizzare un qualsiasi modello dell'Hub in una pipeline
 
 The previous examples used the default model for the task at hand, but you can also choose a particular model from the Hub to use in a pipeline for a specific task — say, text generation. Go to the [Model Hub](https://huggingface.co/models) and click on the corresponding tag on the left to display only the supported models for that task. You should get to a page like [this one](https://huggingface.co/models?pipeline_tag=text-generation).
 
@@ -170,7 +170,7 @@ Once you select a model by clicking on it, you'll see that there is a widget ena
 
 <Tip>
 
-✏️ **Provaci anche tu!** Use the filters to find a text generation model for another language. Feel free to play with the widget and use it in a pipeline!
+✏️ **Provaci anche tu!** Usa i filtri per trovare un modello di generazione testuale per un'altra lingua. Sentiti liberp/a di divertirti con il widget e usalo in una pipeline!
 
 </Tip>
 
@@ -300,7 +300,7 @@ summarizer(
                   'and advance engineering .'}]
 ```
 
-Come nella generazione di testi, puoi specificare un `max_length` o `min_length` per il testo target.
+Come nella generazione di testi, puoi specificare un `max_length` o `min_length` per il testo da generare.
 
 
 ## Traduzione

From aee44d4a3e1a4ea3bdccb4dec204d1b544a775cd Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Mon, 2 May 2022 21:45:20 +0100
Subject: [PATCH 085/127] Update 3.mdx

---
 chapters/it/chapter1/3.mdx | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/chapters/it/chapter1/3.mdx b/chapters/it/chapter1/3.mdx
index 22a20056d..f07401beb 100644
--- a/chapters/it/chapter1/3.mdx
+++ b/chapters/it/chapter1/3.mdx
@@ -141,9 +141,9 @@ Usando l'argomento `num_return_sequences` puoi controllare quante sequenze diver
 
 ## Utilizzare un qualsiasi modello dell'Hub in una pipeline
 
-The previous examples used the default model for the task at hand, but you can also choose a particular model from the Hub to use in a pipeline for a specific task — say, text generation. Go to the [Model Hub](https://huggingface.co/models) and click on the corresponding tag on the left to display only the supported models for that task. You should get to a page like [this one](https://huggingface.co/models?pipeline_tag=text-generation).
+Gli esempi precedenti utilizzavano il modello di default per il compito dato, ma puoi anche scegliere un modello particolare dell'Hub da utilizzare in una pipeline per un compito specifico come ad esempio la generazione testuale. Vai al [Model Hub](https://huggingface.co/models) e clicca sull'etichetta corrispondente a destra per mostrare solo i modelli supportati per il compito in questione. Dovresti trovarti in una pagina come [questa](https://huggingface.co/models?pipeline_tag=text-generation).
 
-Let's try the [`distilgpt2`](https://huggingface.co/distilgpt2) model! Here's how to load it in the same pipeline as before:
+Proviamo il modello [`distilgpt2`](https://huggingface.co/distilgpt2)! Ecco come caricarlo nella pipeline usata in precedenza:
 
 ```python
 from transformers import pipeline
@@ -164,9 +164,9 @@ generator(
                     'time and real'}]
 ```
 
-You can refine your search for a model by clicking on the language tags, and pick a model that will generate text in another language. The Model Hub even contains checkpoints for multilingual models that support several languages.
+Puoi affinare la ricerca di un modello cliccando sulle etichette corrispondenti alle lingue, e scegliere in seguito un modello che generi testo in un'altra lingua. Il Model Hub contiene anche checkpoint per modelli multilingue che supportano diverse lingue.
 
-Once you select a model by clicking on it, you'll see that there is a widget enabling you to try it directly online. This way you can quickly test the model's capabilities before downloading it.
+Quando avrai selezionato un modello cliccando su di esso, vedrai che esiste un widget che ti permette di provarlo direttamente online. In questo modo, puoi testare velocemente le capacità del modello prima di scaricarlo.
 
 <Tip>
 
@@ -174,11 +174,11 @@ Once you select a model by clicking on it, you'll see that there is a widget ena
 
 </Tip>
 
-### The Inference API
+### La Inference API
 
-All the models can be tested directly through your browser using the Inference API, which is available on the Hugging Face [website](https://huggingface.co/). You can play with the model directly on this page by inputting custom text and watching the model process the input data.
+Tutti i modelli possono essere testati direttamente attraverso il tuo browser utilizzando l'Inference API, che trovi nel [sito](https://huggingface.co/) di Hugging Face. Puoi divertirti con il modello direttamente in questa pagina, inserendo testo personalizzato e osservando poi come il modello processi i dati fornitigli.
 
-The Inference API that powers the widget is also available as a paid product, which comes in handy if you need it for your workflows. See the [pricing page](https://huggingface.co/pricing) for more details.
+La Inference API che alimenta il widget è disponibile anche come prodotto a pagamento, il che è comodo se ne hai bisogno per i tuoi flussi di lavoro. Vedi la [pagina dei prezzi](https://huggingface.co/pricing) per maggiori informazioni.
 
 ## Mask filling
 

From 2e97ca60b6b58c4b9ebf5ddcf98976c15bd9e884 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Mon, 2 May 2022 21:53:06 +0100
Subject: [PATCH 086/127] Final version

---
 chapters/it/chapter1/3.mdx | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/chapters/it/chapter1/3.mdx b/chapters/it/chapter1/3.mdx
index f07401beb..8a70cab36 100644
--- a/chapters/it/chapter1/3.mdx
+++ b/chapters/it/chapter1/3.mdx
@@ -82,7 +82,7 @@ Tra le [pipeline disponibili](https://huggingface.co/transformers/main_classes/p
 
 Proviamo a vederne alcune!
 
-## Zero-shot classification
+## Classificazione zero-shot
 
 Cominceremo con l'affrontare un compito impegnativo che consiste nella classificazione di testi non etichettati. Si tratta di uno scenario comune in molti progetti pratici perché l'annotazione testuale richiede tempo e competenza settoriale. In questo caso d'uso, la pipeline `zero-shot-classification` è molto potente e permette di specificare le etichette da utilizzare per la classificazione, in modo da non dover fare affidamento sulle etichette del modello pre-addestrato. Abbiamo già visto come il modello riesca a classificare una frase utilizzando le etichette 'positiva' e 'negativa', ma è anche possibile classificare testi utilizzando una qualsiasi serie di etichette di tua scelta.
 
@@ -139,7 +139,7 @@ Usando l'argomento `num_return_sequences` puoi controllare quante sequenze diver
 </Tip>
 
 
-## Utilizzare un qualsiasi modello dell'Hub in una pipeline
+## Utilizzo di un qualsiasi modello dell'Hub in una pipeline
 
 Gli esempi precedenti utilizzavano il modello di default per il compito dato, ma puoi anche scegliere un modello particolare dell'Hub da utilizzare in una pipeline per un compito specifico come ad esempio la generazione testuale. Vai al [Model Hub](https://huggingface.co/models) e clicca sull'etichetta corrispondente a destra per mostrare solo i modelli supportati per il compito in questione. Dovresti trovarti in una pagina come [questa](https://huggingface.co/models?pipeline_tag=text-generation).
 
@@ -182,7 +182,7 @@ La Inference API che alimenta il widget è disponibile anche come prodotto a pag
 
 ## Mask filling
 
-The next pipeline you'll try is `fill-mask`. The idea of this task is to fill in the blanks in a given text:
+La prossima pipeline che proverai è `fill-mask`. L'idea di questo compito è di completare gli spazi bianchi in un dato testo:
 
 ```python
 from transformers import pipeline
@@ -202,11 +202,11 @@ unmasker("This course will teach you all about <mask> models.", top_k=2)
   'token_str': ' computational'}]
 ```
 
-The `top_k` argument controls how many possibilities you want to be displayed. Note that here the model fills in the special `<mask>` word, which is often referred to as a *mask token*. Other mask-filling models might have different mask tokens, so it's always good to verify the proper mask word when exploring other models. One way to check it is by looking at the mask word used in the widget.
+L'argomento `top_k` gestisce il numero di possibilità che vuoi mostrare. Nota che qui il modello inserisce la `<mask>` word speciale, la quale viene spesso chiamata *mask token*. ALtri modelli di tipo mask-filling potrebbero avere mask token diversi, quindi è sempre bene verificare quale sia la corretta mask word quando esploriamo nuovi modelli. Un modo per verificarla consiste nel trovare la mask word che viene utilizzata nel widget.
 
 <Tip>
 
-✏️ **Provaci anche tu!** Search for the `bert-base-cased` model on the Hub and identify its mask word in the Inference API widget. What does this model predict for the sentence in our `pipeline` example above?
+✏️ **Provaci anche tu!** Cerca il modello `bert-base-cased` nell'Hub e identifica la sua mask word nel widget dell'Inference API. Cosa predice questo modello per la frase nel nostro esempio `pipeline` qui sopra?
 
 </Tip>
 

From d457532891fd6f31e9a01e1189c5840e91e63ca9 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Mon, 2 May 2022 21:53:32 +0100
Subject: [PATCH 087/127] Final version

---
 chapters/it/chapter1/3.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/chapters/it/chapter1/3.mdx b/chapters/it/chapter1/3.mdx
index 8a70cab36..7b87e2cac 100644
--- a/chapters/it/chapter1/3.mdx
+++ b/chapters/it/chapter1/3.mdx
@@ -7,7 +7,7 @@
     {label: "Aws Studio", value: "https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter1/section3.ipynb"},
 ]} />
 
-In questa sezione, vedremo di cosa sono capaci i modelli Transformer and useremo il nostro primo strumento della libreria 🤗 Transformer: la funzione `pipeline()`.
+In questa sezione, vedremo di cosa sono capaci i modelli Transformer e useremo il nostro primo strumento della libreria 🤗 Transformer: la funzione `pipeline()`.
 
 <Tip>
 👀 Lo vedi il pulsante <em>Open in Colab</em> in alto a destra? Cliccalo per aprire il blocco note Colab di Google che contiene tutti gli esempi di codice di questa sezione. Ritroverai il pulsante in ogni sezione che contiene esempi di codice. 

From 2fa4f960ad8fca2a17d256da8f9c8c65ca516a27 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Mon, 2 May 2022 21:54:11 +0100
Subject: [PATCH 088/127] Final version

---
 chapters/it/chapter1/3.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/chapters/it/chapter1/3.mdx b/chapters/it/chapter1/3.mdx
index 7b87e2cac..6b13adbdb 100644
--- a/chapters/it/chapter1/3.mdx
+++ b/chapters/it/chapter1/3.mdx
@@ -12,7 +12,7 @@ In questa sezione, vedremo di cosa sono capaci i modelli Transformer e useremo i
 <Tip>
 👀 Lo vedi il pulsante <em>Open in Colab</em> in alto a destra? Cliccalo per aprire il blocco note Colab di Google che contiene tutti gli esempi di codice di questa sezione. Ritroverai il pulsante in ogni sezione che contiene esempi di codice. 
 
-Se intendi compilare gli esempi localmente, ti consigliamo di dare un'occhio alla sezione <a href="/course/chapter0">setup</a>.
+Se intendi compilare gli esempi localmente, ti consigliamo di dare un occhio alla sezione <a href="/course/chapter0">setup</a>.
 </Tip>
 
 ## I Transformer sono ovunque!

From 6c502e938cafc71addace9f6e3239e0e5af58f9d Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Mon, 2 May 2022 22:05:44 +0100
Subject: [PATCH 089/127] Final version

---
 chapters/it/chapter1/3.mdx | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/chapters/it/chapter1/3.mdx b/chapters/it/chapter1/3.mdx
index 6b13adbdb..7fb506a94 100644
--- a/chapters/it/chapter1/3.mdx
+++ b/chapters/it/chapter1/3.mdx
@@ -17,23 +17,23 @@ Se intendi compilare gli esempi localmente, ti consigliamo di dare un occhio all
 
 ## I Transformer sono ovunque!
 
-I modelli Transformer sono utilizzati per eseguire qualsiasi compito di NLP, come ad esempio quelli menzionati nelle sezioni precedenti. Ecco alcune delle aziende e organizzazioni che utilizzano Hugging Face e i modelli Transformer, e contribuiscono a loro volta alla comunità condividendo i loro propri modelli:
+I modelli Transformer sono utilizzati per eseguire qualsiasi compito di NLP, come ad esempio quelli menzionati nelle sezioni precedenti. Ecco alcune delle aziende e organizzazioni che utilizzano Hugging Face e i modelli Transformer, e contribuiscono a loro volta alla comunità condividendo i propri modelli:
 
 <img src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/companies.PNG" alt="Companies using Hugging Face" width="100%">
 
-La [libreria 🤗 Transformer](https://github.com/huggingface/transformers) fornisce la funzionalità per creare e utilizzare questi modelli condivisi. Il [Model Hub](https://huggingface.co/models) contiene migliaia di modelli pre-addestrati che possono essere scaricati e usati liberamente. Puoi anche caricare i tuoi propri modelli nell'Hub!
+La [libreria 🤗 Transformer](https://github.com/huggingface/transformers) fornisce la funzionalità per creare e utilizzare questi modelli condivisi. Il [Model Hub](https://huggingface.co/models) contiene migliaia di modelli pre-addestrati che possono essere scaricati e usati liberamente. Puoi anche caricare i tuoi modelli nell'Hub!
 
 <Tip>
-⚠️ Hugging Face Hub non è limitato ai soli modelli Transformer. Chiunque può condividere qualsiasi tipo di modello o dataset (<em>insieme di dati</em>)! <a href="https://huggingface.co/join">Crea un profilo huggingface.co</a> per approfittare di tutte le feature (<em>funzioni</em>) disponibili!
+⚠️ L'Hugging Face Hub non si limitata ai soli modelli Transformer. Chiunque può condividere qualsiasi tipo di modello o dataset (<em>insieme di dati</em>)! <a href="https://huggingface.co/join">Crea un profilo huggingface.co</a> per approfittare di tutte le funzioni disponibili!
 </Tip>
 
-Prima di scoprire come i modelli Transformer funzionino dietro le quinte, diamo un occhio a qualche esempio di come questi possano essere utilizzati per risolvere alcuni problemi interessanti di NLP.
+Prima di scoprire come funzionino i modelli Transformer dietro le quinte, vediamo qualche esempio di come questi possano essere utilizzati per risolvere alcuni problemi interessanti di NLP.
 
 ## Lavorare con le pipeline
 
 <Youtube id="tiZFewofSLM" />
 
-L'oggetto più basilare della libreria 🤗 Transformer è la funzione `pipeline()`. Questa connette un modello con tutte le fasi necessarie di preprocessing e postprocessing, permettendoci così di fornire direttamente un qualsiasi testo come input e ottenere una risposta intelligibile:
+L'oggetto più basilare della libreria 🤗 Transformer è la funzione `pipeline()`. Questa connette un modello con tutte le fasi necessarie di preprocessing e postprocessing, permettendoci così di fornire un qualsiasi testo come input diretto e ottenere una risposta intelligibile:
 
 ```python
 from transformers import pipeline
@@ -61,11 +61,11 @@ classifier(
 
 Per default, questa pipeline seleziona un preciso modello pre-addestrato che è stato affinato per il sentiment analysis in inglese. Quando creiamo l'oggetto `classifier`, il modello viene scaricato e memorizzato nella cache. Se inizializziamo di nuovo il comando, verrà utilizzato il modello salvato nella cache e non ci sarà quindi bisogno di scaricare di nuovo il modello.
 
-Tre passaggi principali vengono coinvolti quando passiamo del testo in un pipeline:
+Tre passaggi principali sono coinvolti quando passiamo del testo in un pipeline:
 
 1. Il testo è pre-elaborato in un formato che il modello può capire.
 2. Gli input pre-elaborati vengono passati al modello.
-3. Le previsioni del modello sono post-elaborate in in un formato accessibile all'utilizzatore.
+3. Le previsioni del modello sono post-elaborate in un formato accessibile all'utilizzatore.
 
 
 Tra le [pipeline disponibili](https://huggingface.co/transformers/main_classes/pipelines.html) al momento ci sono:
@@ -84,7 +84,7 @@ Proviamo a vederne alcune!
 
 ## Classificazione zero-shot
 
-Cominceremo con l'affrontare un compito impegnativo che consiste nella classificazione di testi non etichettati. Si tratta di uno scenario comune in molti progetti pratici perché l'annotazione testuale richiede tempo e competenza settoriale. In questo caso d'uso, la pipeline `zero-shot-classification` è molto potente e permette di specificare le etichette da utilizzare per la classificazione, in modo da non dover fare affidamento sulle etichette del modello pre-addestrato. Abbiamo già visto come il modello riesca a classificare una frase utilizzando le etichette 'positiva' e 'negativa', ma è anche possibile classificare testi utilizzando una qualsiasi serie di etichette di tua scelta.
+Cominceremo con l'affrontare un compito impegnativo che consiste nella classificazione di testi non etichettati. Si tratta di uno scenario comune in molti progetti pratici perché l'annotazione testuale richiede tempo e competenza settoriale. In questo caso d'uso, la pipeline `zero-shot-classification` è molto potente e permette di specificare le etichette da utilizzare per la classificazione, in modo da non dover fare affidamento sulle etichette del modello pre-addestrato. Abbiamo già visto come il modello riesca a classificare una frase utilizzando le etichette 'positiva' e 'negativa', ma è anche possibile classificare testi utilizzando una qualsiasi serie di etichette di nostra scelta.
 
 ```python
 from transformers import pipeline
@@ -130,7 +130,7 @@ generator("In this course, we will teach you how to")
                     'HTTP'}]
 ```
 
-Usando l'argomento `num_return_sequences` puoi controllare quante sequenze diverse vengono generate e, con l'argomento `max_length` la lunghezza totale dell'output testuale.
+Usando l'argomento `num_return_sequences` puoi controllare quante sequenze diverse vengono generate e, con l'argomento `max_length`, la lunghezza totale dell'output testuale.
 
 <Tip>
 
@@ -141,7 +141,7 @@ Usando l'argomento `num_return_sequences` puoi controllare quante sequenze diver
 
 ## Utilizzo di un qualsiasi modello dell'Hub in una pipeline
 
-Gli esempi precedenti utilizzavano il modello di default per il compito dato, ma puoi anche scegliere un modello particolare dell'Hub da utilizzare in una pipeline per un compito specifico come ad esempio la generazione testuale. Vai al [Model Hub](https://huggingface.co/models) e clicca sull'etichetta corrispondente a destra per mostrare solo i modelli supportati per il compito in questione. Dovresti trovarti in una pagina come [questa](https://huggingface.co/models?pipeline_tag=text-generation).
+Gli esempi precedenti utilizzavano il modello di default per il compito dato, ma puoi anche scegliere un modello particolare dell'Hub da utilizzare in una pipeline per un compito specifico, come ad esempio la generazione testuale. Vai al [Model Hub](https://huggingface.co/models) e clicca sull'etichetta corrispondente a destra, in modo da mostrare solo i modelli supportati per il compito in questione. Dovresti ritrovarti in una pagina come [questa](https://huggingface.co/models?pipeline_tag=text-generation).
 
 Proviamo il modello [`distilgpt2`](https://huggingface.co/distilgpt2)! Ecco come caricarlo nella pipeline usata in precedenza:
 
@@ -164,19 +164,19 @@ generator(
                     'time and real'}]
 ```
 
-Puoi affinare la ricerca di un modello cliccando sulle etichette corrispondenti alle lingue, e scegliere in seguito un modello che generi testo in un'altra lingua. Il Model Hub contiene anche checkpoint per modelli multilingue che supportano diverse lingue.
+Puoi affinare la ricerca di un modello cliccando sulle etichette corrispondenti alle lingue, e scegliere in seguito un modello che generi testo in un'altra lingua. Il Model Hub contiene anche checkpoint per modelli multilingue che supportano numerose lingue.
 
 Quando avrai selezionato un modello cliccando su di esso, vedrai che esiste un widget che ti permette di provarlo direttamente online. In questo modo, puoi testare velocemente le capacità del modello prima di scaricarlo.
 
 <Tip>
 
-✏️ **Provaci anche tu!** Usa i filtri per trovare un modello di generazione testuale per un'altra lingua. Sentiti liberp/a di divertirti con il widget e usalo in una pipeline!
+✏️ **Provaci anche tu!** Usa i filtri per trovare un modello di generazione testuale per un'altra lingua. Sentiti libero/a di divertirti con il widget e usalo in una pipeline!
 
 </Tip>
 
 ### La Inference API
 
-Tutti i modelli possono essere testati direttamente attraverso il tuo browser utilizzando l'Inference API, che trovi nel [sito](https://huggingface.co/) di Hugging Face. Puoi divertirti con il modello direttamente in questa pagina, inserendo testo personalizzato e osservando poi come il modello processi i dati fornitigli.
+Tutti i modelli possono essere testati direttamente attraverso il tuo browser utilizzando l'Inference API che trovi nel [sito](https://huggingface.co/) di Hugging Face. Puoi divertirti con il modello direttamente in questa pagina, inserendo testo personalizzato e osservando come il modello processi i dati fornitigli.
 
 La Inference API che alimenta il widget è disponibile anche come prodotto a pagamento, il che è comodo se ne hai bisogno per i tuoi flussi di lavoro. Vedi la [pagina dei prezzi](https://huggingface.co/pricing) per maggiori informazioni.
 
@@ -202,7 +202,7 @@ unmasker("This course will teach you all about <mask> models.", top_k=2)
   'token_str': ' computational'}]
 ```
 
-L'argomento `top_k` gestisce il numero di possibilità che vuoi mostrare. Nota che qui il modello inserisce la `<mask>` word speciale, la quale viene spesso chiamata *mask token*. ALtri modelli di tipo mask-filling potrebbero avere mask token diversi, quindi è sempre bene verificare quale sia la corretta mask word quando esploriamo nuovi modelli. Un modo per verificarla consiste nel trovare la mask word che viene utilizzata nel widget.
+L'argomento `top_k` gestisce il numero di possibilità che vuoi mostrare. Nota che qui il modello inserisce la `<mask>` word speciale, la quale viene spesso chiamata *mask token*. Altri modelli di tipo mask-filling potrebbero avere mask token diversi, quindi è sempre bene verificare quale sia la corretta mask word quando esploriamo nuovi modelli. Un modo per verificarla consiste nel trovare la mask word utilizzata nel widget.
 
 <Tip>
 
@@ -212,7 +212,7 @@ L'argomento `top_k` gestisce il numero di possibilità che vuoi mostrare. Nota c
 
 ## Riconoscimento delle entità nominate
 
-Il riconoscimento di entità nominate (*Named entity recognition*, NER) è un compito in cui il modello deve determinare quali parti dell'input testuale corrispondono a entità quali persone, località, o organizzazioni. Guardiamo a un esempio:
+Il riconoscimento delle entità nominate (*Named entity recognition*, NER) è un compito in cui il modello deve determinare quali parti dell'input testuale corrispondono a entità quali persone, località, o organizzazioni. Guardiamo a un esempio:
 
 ```python
 from transformers import pipeline
@@ -230,7 +230,7 @@ ner("My name is Sylvain and I work at Hugging Face in Brooklyn.")
 
 Qui il modello ha correttamente identificato che Sylvain è una persona (PER), Hugging Face un'organizzazione (ORG), e Brooklyn una località (LOC).
 
-Passiamo l'opzione `grouped_entities=True` nella funzione di creazione della pipeline per raggruppare le parti frasali che corrispondono alla stessa entità: qui il modello raggruppa correttamente "Hugging" e "Face" come singola organizzazione, nonostante il nome sia formato da più parole. A dire in vero, come vedremo nel prossimo capitolo, il preprocessing divide perfino alcune parole in parti più piccole. Ad esempio, `Sylvain` viene suddiviso in quattro parti: `S`, `##yl`, `##va`, and `##in`. Al momento del post-processing, la pipeline ha raggruppato le parti con successo.
+Passiamo l'opzione `grouped_entities=True` nella funzione di creazione della pipeline per raggruppare le parti frasali che corrispondono alla stessa entità: qui il modello raggruppa correttamente "Hugging" e "Face" come singola organizzazione, nonostante il nome sia formato da più parole. A dire il vero, come vedremo nel prossimo capitolo, il preprocessing divide perfino alcune parole in parti più piccole. Ad esempio, `Sylvain` viene suddiviso in quattro parti: `S`, `##yl`, `##va`, and `##in`. Al momento del post-processing, la pipeline raggruppa le parti con successo.
 
 <Tip>
 
@@ -256,7 +256,7 @@ question_answerer(
 {'score': 0.6385916471481323, 'start': 33, 'end': 45, 'answer': 'Hugging Face'}
 ```
 
-Nota che questa pipeline non genera risposte ma estrae informazioni da un contesto prestabilito.
+Nota che questa pipeline non genera risposte ma estrae informazioni da un contesto fornito.
 
 ## Riassunto
 
@@ -326,4 +326,4 @@ Come per le funzioni di generazione testuale e riassunto, è possibile specifica
 
 </Tip>
 
-Finora abbiamo mostrato pipeline a solo scopo dimostrativo. Tali pipeline sono stati programmati per compiti ben specifici e non sono in grado di eseguire variazioni di questi ultimi. Nel prossimo capitolo, imparerai cosa si nasconde dentro la funzione `pipeline()` e come personalizzarne il comportamento.
+Finora abbiamo mostrato pipeline a solo scopo dimostrativo. Tali pipeline sono state programmate per compiti ben specifici e non sono in grado di eseguire variazioni di questi ultimi. Nel prossimo capitolo, imparerai cosa si nasconde dentro la funzione `pipeline()` e come personalizzarne il comportamento.

From 309f913e1ac78687541ac530f3c42187a9dbde52 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 3 May 2022 21:55:35 +0100
Subject: [PATCH 090/127] Creates 4.mdx

---
 chapters/it/chapter1/4.mdx | 171 +++++++++++++++++++++++++++++++++++++
 1 file changed, 171 insertions(+)
 create mode 100644 chapters/it/chapter1/4.mdx

diff --git a/chapters/it/chapter1/4.mdx b/chapters/it/chapter1/4.mdx
new file mode 100644
index 000000000..6d286a42e
--- /dev/null
+++ b/chapters/it/chapter1/4.mdx
@@ -0,0 +1,171 @@
+# How do Transformers work?
+
+In this section, we will take a high-level look at the architecture of Transformer models.
+
+## A bit of Transformer history
+
+Here are some reference points in the (short) history of Transformer models:
+
+<div class="flex justify-center">
+<img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/transformers_chrono.svg" alt="A brief chronology of Transformers models.">
+<img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/transformers_chrono-dark.svg" alt="A brief chronology of Transformers models.">
+</div>
+
+The [Transformer architecture](https://arxiv.org/abs/1706.03762) was introduced in June 2017. The focus of the original research was on translation tasks. This was followed by the introduction of several influential models, including:
+
+- **June 2018**: [GPT](https://cdn.openai.com/research-covers/language-unsupervised/language_understanding_paper.pdf), the first pretrained Transformer model, used for fine-tuning on various NLP tasks and obtained state-of-the-art results
+
+- **October 2018**: [BERT](https://arxiv.org/abs/1810.04805), another large pretrained model, this one designed to produce better summaries of sentences (more on this in the next chapter!)
+
+- **February 2019**: [GPT-2](https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf), an improved (and bigger) version of GPT that was not immediately publicly released due to ethical concerns
+
+- **October 2019**: [DistilBERT](https://arxiv.org/abs/1910.01108), a distilled version of BERT that is 60% faster, 40% lighter in memory, and still retains 97% of BERT's performance
+
+- **October 2019**: [BART](https://arxiv.org/abs/1910.13461) and [T5](https://arxiv.org/abs/1910.10683), two large pretrained models using the same architecture as the original Transformer model (the first to do so)
+
+- **May 2020**, [GPT-3](https://arxiv.org/abs/2005.14165), an even bigger version of GPT-2 that is able to perform well on a variety of tasks without the need for fine-tuning (called _zero-shot learning_)
+
+This list is far from comprehensive, and is just meant to highlight a few of the different kinds of Transformer models. Broadly, they can be grouped into three categories:
+
+- GPT-like (also called _auto-regressive_ Transformer models)
+- BERT-like (also called _auto-encoding_ Transformer models) 
+- BART/T5-like (also called _sequence-to-sequence_ Transformer models)
+
+We will dive into these families in more depth later on.
+
+## Transformers are language models
+
+All the Transformer models mentioned above (GPT, BERT, BART, T5, etc.) have been trained as *language models*. This means they have been trained on large amounts of raw text in a self-supervised fashion. Self-supervised learning is a type of training in which the objective is automatically computed from the inputs of the model. That means that humans are not needed to label the data!
+
+This type of model develops a statistical understanding of the language it has been trained on, but it's not very useful for specific practical tasks. Because of this, the general pretrained model then goes through a process called *transfer learning*. During this process, the model is fine-tuned in a supervised way -- that is, using human-annotated labels -- on a given task.
+
+An example of a task is predicting the next word in a sentence having read the *n* previous words. This is called *causal language modeling* because the output depends on the past and present inputs, but not the future ones.
+
+<div class="flex justify-center">
+<img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/causal_modeling.svg" alt="Example of causal language modeling in which the next word from a sentence is predicted.">
+<img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/causal_modeling-dark.svg" alt="Example of causal language modeling in which the next word from a sentence is predicted.">
+</div>
+
+Another example is *masked language modeling*, in which the model predicts a masked word in the sentence.
+
+<div class="flex justify-center">
+<img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/masked_modeling.svg" alt="Example of masked language modeling in which a masked word from a sentence is predicted.">
+<img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/masked_modeling-dark.svg" alt="Example of masked language modeling in which a masked word from a sentence is predicted.">
+</div>
+
+## Transformers are big models
+
+Apart from a few outliers (like DistilBERT), the general strategy to achieve better performance is by increasing the models' sizes as well as the amount of data they are pretrained on.
+
+<div class="flex justify-center">
+<img src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/model_parameters.png" alt="Number of parameters of recent Transformers models" width="90%">
+</div>
+
+Unfortunately, training a model, especially a large one, requires a large amount of data. This becomes very costly in terms of time and compute resources. It even translates to environmental impact, as can be seen in the following graph.
+
+<div class="flex justify-center">
+<img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/carbon_footprint.svg" alt="The carbon footprint of a large language model.">
+<img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/carbon_footprint-dark.svg" alt="The carbon footprint of a large language model.">
+</div>
+
+<Youtube id="ftWlj4FBHTg"/>
+
+And this is showing a project for a (very big) model led by a team consciously trying to reduce the environmental impact of pretraining. The footprint of running lots of trials to get the best hyperparameters would be even higher.
+
+Imagine if each time a research team, a student organization, or a company wanted to train a model, it did so from scratch. This would lead to huge, unnecessary global costs!
+
+This is why sharing language models is paramount: sharing the trained weights and building on top of already trained weights reduces the overall compute cost and carbon footprint of the community.
+
+
+## Transfer Learning
+
+<Youtube id="BqqfQnyjmgg" />
+
+*Pretraining* is the act of training a model from scratch: the weights are randomly initialized, and the training starts without any prior knowledge.
+
+<div class="flex justify-center">
+<img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/pretraining.svg" alt="The pretraining of a language model is costly in both time and money.">
+<img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/pretraining-dark.svg" alt="The pretraining of a language model is costly in both time and money.">
+</div>
+
+This pretraining is usually done on very large amounts of data. Therefore, it requires a very large corpus of data, and training can take up to several weeks.
+
+*Fine-tuning*, on the other hand, is the training done **after** a model has been pretrained. To perform fine-tuning, you first acquire a pretrained language model, then perform additional training with a dataset specific to your task. Wait -- why not simply train directly for the final task? There are a couple of reasons:
+
+*  The pretrained model was already trained on a dataset that has some similarities with the fine-tuning dataset. The fine-tuning process is thus able to take advantage of knowledge acquired by the initial model during pretraining (for instance, with NLP problems, the pretrained model will have some kind of statistical understanding of the language you are using for your task). 
+*  Since the pretrained model was already trained on lots of data, the fine-tuning requires way less data to get decent results.
+*  For the same reason, the amount of time and resources needed to get good results are much lower.
+
+For example, one could leverage a pretrained model trained on the English language and then fine-tune it on an arXiv corpus, resulting in a science/research-based model. The fine-tuning will only require a limited amount of data: the knowledge the pretrained model has acquired is "transferred," hence the term *transfer learning*.
+
+<div class="flex justify-center">
+<img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/finetuning.svg" alt="The fine-tuning of a language model is cheaper than pretraining in both time and money.">
+<img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/finetuning-dark.svg" alt="The fine-tuning of a language model is cheaper than pretraining in both time and money.">
+</div>
+
+Fine-tuning a model therefore has lower time, data, financial, and environmental costs. It is also quicker and easier to iterate over different fine-tuning schemes, as the training is less constraining than a full pretraining.
+
+This process will also achieve better results than training from scratch (unless you have lots of data), which is why you should always try to leverage a pretrained model -- one as close as possible to the task you have at hand -- and fine-tune it.
+
+## General architecture
+
+In this section, we'll go over the general architecture of the Transformer model. Don't worry if you don't understand some of the concepts; there are detailed sections later covering each of the components.
+
+<Youtube id="H39Z_720T5s" />
+
+## Introduction
+
+The model is primarily composed of two blocks:
+
+* **Encoder (left)**: The encoder receives an input and builds a representation of it (its features). This means that the model is optimized to acquire understanding from the input.
+* **Decoder (right)**: The decoder uses the encoder's representation (features) along with other inputs to generate a target sequence. This means that the model is optimized for generating outputs.
+
+<div class="flex justify-center">
+<img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/transformers_blocks.svg" alt="Architecture of a Transformers models">
+<img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/transformers_blocks-dark.svg" alt="Architecture of a Transformers models">
+</div>
+
+Each of these parts can be used independently, depending on the task: 
+
+* **Encoder-only models**: Good for tasks that require understanding of the input, such as sentence classification and named entity recognition.
+* **Decoder-only models**: Good for generative tasks such as text generation.
+* **Encoder-decoder models** or **sequence-to-sequence models**: Good for generative tasks that require an input, such as translation or summarization.
+
+We will dive into those architectures independently in later sections.
+
+## Attention layers
+
+A key feature of Transformer models is that they are built with special layers called *attention layers*. In fact, the title of the paper introducing the Transformer architecture was ["Attention Is All You Need"](https://arxiv.org/abs/1706.03762)! We will explore the details of attention layers later in the course; for now, all you need to know is that this layer will tell the model to pay specific attention to certain words in the sentence you passed it (and more or less ignore the others) when dealing with the representation of each word.
+
+To put this into context, consider the task of translating text from English to French. Given the input "You like this course", a translation model will need to also attend to the adjacent word "You" to get the proper translation for the word "like", because in French the verb "like" is conjugated differently depending on the subject. The rest of the sentence, however, is not useful for the translation of that word. In the same vein, when translating "this" the model will also need to pay attention to the word "course", because "this" translates differently depending on whether the associated noun is masculine or feminine. Again, the other words in the sentence will not matter for the translation of "this". With more complex sentences (and more complex grammar rules), the model would need to pay special attention to words that might appear farther away in the sentence to properly translate each word.
+
+The same concept applies to any task associated with natural language: a word by itself has a meaning, but that meaning is deeply affected by the context, which can be any other word (or words) before or after the word being studied.
+
+Now that you have an idea of what attention layers are all about, let's take a closer look at the Transformer architecture.
+
+## The original architecture
+
+The Transformer architecture was originally designed for translation. During training, the encoder receives inputs (sentences) in a certain language, while the decoder receives the same sentences in the desired target language. In the encoder, the attention layers can use all the words in a sentence (since, as we just saw, the translation of a given word can be dependent on what is after as well as before it in the sentence). The decoder, however, works sequentially and can only pay attention to the words in the sentence that it has already translated (so, only the words before the word currently being generated). For example, when we have predicted the first three words of the translated target, we give them to the decoder  which then uses all the inputs of the encoder to try to predict the fourth word.
+
+To speed things up during training (when the model has access to target sentences), the decoder is fed the whole target, but it is not allowed to use future words (if it had access to the word at position 2 when trying to predict the word at position 2, the problem would not be very hard!). For instance, when trying to predict the fourth word, the attention layer will only have access to the words in positions 1 to 3.
+
+The original Transformer architecture looked like this, with the encoder on the left and the decoder on the right:
+
+<div class="flex justify-center">
+<img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/transformers.svg" alt="Architecture of a Transformers models">
+<img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/transformers-dark.svg" alt="Architecture of a Transformers models">
+</div>
+
+Note that the first attention layer in a decoder block pays attention to all (past) inputs to the decoder, but the second attention layer uses the output of the encoder. It can thus access the whole input sentence to best predict the current word. This is very useful as different languages can have grammatical rules that put the words in different orders, or some context provided later in the sentence may be helpful to determine the best translation of a given word.
+
+The *attention mask* can also be used in the encoder/decoder to prevent the model from paying attention to some special words -- for instance, the special padding word used to make all the inputs the same length when batching together sentences.
+
+##  Architectures vs. checkpoints
+
+As we dive into Transformer models in this course, you'll see mentions of *architectures* and *checkpoints* as well as *models*. These terms all have slightly different meanings: 
+
+* **Architecture**: This is the skeleton of the model -- the definition of each layer and each operation that happens within the model. 
+* **Checkpoints**: These are the weights that will be loaded in a given architecture.
+* **Model**: This is an umbrella term that isn't as precise as "architecture" or "checkpoint": it can mean both. This course will specify *architecture* or *checkpoint* when it matters to reduce ambiguity.
+
+For example, BERT is an architecture while `bert-base-cased`, a set of weights trained by the Google team for the first release of BERT, is a checkpoint. However, one can say "the BERT model" and "the `bert-base-cased` model."

From 32c1bcbbc949138c4b4c339ba9a4f73489fbe7c1 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 3 May 2022 21:56:45 +0100
Subject: [PATCH 091/127] Updates ToC

---
 chapters/it/_toctree.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/chapters/it/_toctree.yml b/chapters/it/_toctree.yml
index d9c034499..fcb79f232 100644
--- a/chapters/it/_toctree.yml
+++ b/chapters/it/_toctree.yml
@@ -11,3 +11,5 @@
     title: Natural Language Processing
   - local: chapter1/3
     title: Cosa fanno i Transformer?
+  - local: chapter1/4
+    title: Come funzionano i Transformer?

From 7b9c2047de7dacf504778f7fa7e56698645c3c67 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 3 May 2022 21:59:16 +0100
Subject: [PATCH 092/127] Translates title

---
 chapters/it/chapter1/4.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/chapters/it/chapter1/4.mdx b/chapters/it/chapter1/4.mdx
index 6d286a42e..3d57c0076 100644
--- a/chapters/it/chapter1/4.mdx
+++ b/chapters/it/chapter1/4.mdx
@@ -1,4 +1,4 @@
-# How do Transformers work?
+# Come funzionano i Transformer?
 
 In this section, we will take a high-level look at the architecture of Transformer models.
 

From bf44e3c20cce01f1bdff4966d35decc3903c2fdf Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 3 May 2022 22:03:45 +0100
Subject: [PATCH 093/127] Updates translation

---
 chapters/it/chapter1/4.mdx | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/chapters/it/chapter1/4.mdx b/chapters/it/chapter1/4.mdx
index 3d57c0076..f293b49b2 100644
--- a/chapters/it/chapter1/4.mdx
+++ b/chapters/it/chapter1/4.mdx
@@ -1,29 +1,29 @@
 # Come funzionano i Transformer?
 
-In this section, we will take a high-level look at the architecture of Transformer models.
+In questa sezione, sorvoleremo in maniera avanzata l'architettura dei modelli Transformer.
 
-## A bit of Transformer history
+## Un po' di storia dei Transformer
 
-Here are some reference points in the (short) history of Transformer models:
+Ecco alcuni punti di riferimento nella (breve) storia dei modelli Transformer:
 
 <div class="flex justify-center">
 <img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/transformers_chrono.svg" alt="A brief chronology of Transformers models.">
 <img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/transformers_chrono-dark.svg" alt="A brief chronology of Transformers models.">
 </div>
 
-The [Transformer architecture](https://arxiv.org/abs/1706.03762) was introduced in June 2017. The focus of the original research was on translation tasks. This was followed by the introduction of several influential models, including:
+L'[architettura Transformer](https://arxiv.org/abs/1706.03762) è stata introdotta in giugno 2017. Il focus della ricerca di partenza era sui compiti di traduzione. A questa seguì l'introduzione di numerosi modelli influenti, tra cui figurano:
 
-- **June 2018**: [GPT](https://cdn.openai.com/research-covers/language-unsupervised/language_understanding_paper.pdf), the first pretrained Transformer model, used for fine-tuning on various NLP tasks and obtained state-of-the-art results
+- **giugno 2018**: [GPT](https://cdn.openai.com/research-covers/language-unsupervised/language_understanding_paper.pdf), the first pretrained Transformer model, used for fine-tuning on various NLP tasks and obtained state-of-the-art results
 
-- **October 2018**: [BERT](https://arxiv.org/abs/1810.04805), another large pretrained model, this one designed to produce better summaries of sentences (more on this in the next chapter!)
+- **ottobre 2018**: [BERT](https://arxiv.org/abs/1810.04805), another large pretrained model, this one designed to produce better summaries of sentences (more on this in the next chapter!)
 
-- **February 2019**: [GPT-2](https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf), an improved (and bigger) version of GPT that was not immediately publicly released due to ethical concerns
+- **febbraio 2019**: [GPT-2](https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf), an improved (and bigger) version of GPT that was not immediately publicly released due to ethical concerns
 
-- **October 2019**: [DistilBERT](https://arxiv.org/abs/1910.01108), a distilled version of BERT that is 60% faster, 40% lighter in memory, and still retains 97% of BERT's performance
+- **ottobre 2019**: [DistilBERT](https://arxiv.org/abs/1910.01108), a distilled version of BERT that is 60% faster, 40% lighter in memory, and still retains 97% of BERT's performance
 
-- **October 2019**: [BART](https://arxiv.org/abs/1910.13461) and [T5](https://arxiv.org/abs/1910.10683), two large pretrained models using the same architecture as the original Transformer model (the first to do so)
+- **ottobre 2019**: [BART](https://arxiv.org/abs/1910.13461) and [T5](https://arxiv.org/abs/1910.10683), two large pretrained models using the same architecture as the original Transformer model (the first to do so)
 
-- **May 2020**, [GPT-3](https://arxiv.org/abs/2005.14165), an even bigger version of GPT-2 that is able to perform well on a variety of tasks without the need for fine-tuning (called _zero-shot learning_)
+- **maggio 2020**, [GPT-3](https://arxiv.org/abs/2005.14165), an even bigger version of GPT-2 that is able to perform well on a variety of tasks without the need for fine-tuning (called _zero-shot learning_)
 
 This list is far from comprehensive, and is just meant to highlight a few of the different kinds of Transformer models. Broadly, they can be grouped into three categories:
 

From deac45a99277f70bf9bd331a12e87c12000c382c Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 3 May 2022 22:14:18 +0100
Subject: [PATCH 094/127] Updates translation

---
 chapters/it/chapter1/4.mdx | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/chapters/it/chapter1/4.mdx b/chapters/it/chapter1/4.mdx
index f293b49b2..63ef5e040 100644
--- a/chapters/it/chapter1/4.mdx
+++ b/chapters/it/chapter1/4.mdx
@@ -13,11 +13,11 @@ Ecco alcuni punti di riferimento nella (breve) storia dei modelli Transformer:
 
 L'[architettura Transformer](https://arxiv.org/abs/1706.03762) è stata introdotta in giugno 2017. Il focus della ricerca di partenza era sui compiti di traduzione. A questa seguì l'introduzione di numerosi modelli influenti, tra cui figurano:
 
-- **giugno 2018**: [GPT](https://cdn.openai.com/research-covers/language-unsupervised/language_understanding_paper.pdf), the first pretrained Transformer model, used for fine-tuning on various NLP tasks and obtained state-of-the-art results
+- **giugno 2018**: [GPT](https://cdn.openai.com/research-covers/language-unsupervised/language_understanding_paper.pdf), il primo modello Transformer pre-addestrato, viene usato per affinare diversi compiti di NLP e ottiene risultati all'avanguardia
 
-- **ottobre 2018**: [BERT](https://arxiv.org/abs/1810.04805), another large pretrained model, this one designed to produce better summaries of sentences (more on this in the next chapter!)
+- **ottobre 2018**: [BERT](https://arxiv.org/abs/1810.04805), un altro ampio modello pre-addestrato, questa volta progettato per produrre riassunti di frasi migliori (ne scopriremo di più nel prossimo capitolo!)
 
-- **febbraio 2019**: [GPT-2](https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf), an improved (and bigger) version of GPT that was not immediately publicly released due to ethical concerns
+- **febbraio 2019**: [GPT-2](https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf), una versione (migliorata e ingrandita) di GPT che non fu distribuita immediatamente al pubblico a causa di preoccupazioni etiche
 
 - **ottobre 2019**: [DistilBERT](https://arxiv.org/abs/1910.01108), a distilled version of BERT that is 60% faster, 40% lighter in memory, and still retains 97% of BERT's performance
 

From 036e45076251331af3356f011086e0c613ea476d Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Thu, 5 May 2022 21:56:07 +0100
Subject: [PATCH 095/127] Updates translation

---
 chapters/it/chapter1/4.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/chapters/it/chapter1/4.mdx b/chapters/it/chapter1/4.mdx
index 63ef5e040..c83252c5e 100644
--- a/chapters/it/chapter1/4.mdx
+++ b/chapters/it/chapter1/4.mdx
@@ -21,7 +21,7 @@ L'[architettura Transformer](https://arxiv.org/abs/1706.03762) è stata introdot
 
 - **ottobre 2019**: [DistilBERT](https://arxiv.org/abs/1910.01108), a distilled version of BERT that is 60% faster, 40% lighter in memory, and still retains 97% of BERT's performance
 
-- **ottobre 2019**: [BART](https://arxiv.org/abs/1910.13461) and [T5](https://arxiv.org/abs/1910.10683), two large pretrained models using the same architecture as the original Transformer model (the first to do so)
+- **ottobre 2019**: [BART](https://arxiv.org/abs/1910.13461) and [T5](https://arxiv.org/abs/1910.10683), due grossi modelli pre-addestrati che utilizzano la stessa architettura del modello Transformer originale (nonché i primi a farlo)
 
 - **maggio 2020**, [GPT-3](https://arxiv.org/abs/2005.14165), an even bigger version of GPT-2 that is able to perform well on a variety of tasks without the need for fine-tuning (called _zero-shot learning_)
 

From ed7e2da023c287c422d7639d398bdd6b9ef66228 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Wed, 11 May 2022 22:49:44 +0100
Subject: [PATCH 096/127] Create 5.mdx

---
 chapters/it/chapter1/5.mdx | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 chapters/it/chapter1/5.mdx

diff --git a/chapters/it/chapter1/5.mdx b/chapters/it/chapter1/5.mdx
new file mode 100644
index 000000000..8b1378917
--- /dev/null
+++ b/chapters/it/chapter1/5.mdx
@@ -0,0 +1 @@
+

From 9ef61d63b03661d543e06868bdd231d85eeffaf1 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Wed, 11 May 2022 22:50:52 +0100
Subject: [PATCH 097/127] Updates 5.mdx

---
 chapters/it/chapter1/5.mdx | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/chapters/it/chapter1/5.mdx b/chapters/it/chapter1/5.mdx
index 8b1378917..1c707033b 100644
--- a/chapters/it/chapter1/5.mdx
+++ b/chapters/it/chapter1/5.mdx
@@ -1 +1,17 @@
+# Encoder models
 
+<Youtube id="MUqNwgPjJvQ" />
+
+Encoder models use only the encoder of a Transformer model. At each stage, the attention layers can access all the words in the initial sentence. These models are often characterized as having "bi-directional" attention, and are often called *auto-encoding models*.
+
+The pretraining of these models usually revolves around somehow corrupting a given sentence (for instance, by masking random words in it) and tasking the model with finding or reconstructing the initial sentence.
+
+Encoder models are best suited for tasks requiring an understanding of the full sentence, such as sentence classification, named entity recognition (and more generally word classification), and extractive question answering.
+
+Representatives of this family of models include:
+
+- [ALBERT](https://huggingface.co/transformers/model_doc/albert.html)
+- [BERT](https://huggingface.co/transformers/model_doc/bert.html)
+- [DistilBERT](https://huggingface.co/transformers/model_doc/distilbert.html)
+- [ELECTRA](https://huggingface.co/transformers/model_doc/electra.html)
+- [RoBERTa](https://huggingface.co/transformers/model_doc/roberta.html)

From ee5ecb591b42c44086c1cc1dd9b5a98fbfc35929 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Wed, 11 May 2022 22:51:47 +0100
Subject: [PATCH 098/127] Translates title

---
 chapters/it/chapter1/5.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/chapters/it/chapter1/5.mdx b/chapters/it/chapter1/5.mdx
index 1c707033b..d08266396 100644
--- a/chapters/it/chapter1/5.mdx
+++ b/chapters/it/chapter1/5.mdx
@@ -1,4 +1,4 @@
-# Encoder models
+# Modelli encoder
 
 <Youtube id="MUqNwgPjJvQ" />
 

From 8583909eb548e686fe9dad70256e27267935fade Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Wed, 11 May 2022 22:54:15 +0100
Subject: [PATCH 099/127] Adds 1/5

---
 chapters/it/_toctree.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/chapters/it/_toctree.yml b/chapters/it/_toctree.yml
index fcb79f232..f85e37c97 100644
--- a/chapters/it/_toctree.yml
+++ b/chapters/it/_toctree.yml
@@ -13,3 +13,5 @@
     title: Cosa fanno i Transformer?
   - local: chapter1/4
     title: Come funzionano i Transformer?
+  - local: chapter1/5
+    title: Encoder models

From 1dbabeb2241c036dcc8ba7a0554df246e38b200a Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Mon, 30 May 2022 20:27:05 +0100
Subject: [PATCH 100/127] Update _toctree.yml

---
 chapters/it/_toctree.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/chapters/it/_toctree.yml b/chapters/it/_toctree.yml
index f85e37c97..ff7073d36 100644
--- a/chapters/it/_toctree.yml
+++ b/chapters/it/_toctree.yml
@@ -14,4 +14,4 @@
   - local: chapter1/4
     title: Come funzionano i Transformer?
   - local: chapter1/5
-    title: Encoder models
+    title: Modelli Encoder

From 3d378d307b8db116c736a86a0b9632e3a25f3e34 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Mon, 30 May 2022 20:27:42 +0100
Subject: [PATCH 101/127] Update _toctree.yml

---
 chapters/it/_toctree.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/chapters/it/_toctree.yml b/chapters/it/_toctree.yml
index ff7073d36..87ed3a6d2 100644
--- a/chapters/it/_toctree.yml
+++ b/chapters/it/_toctree.yml
@@ -14,4 +14,4 @@
   - local: chapter1/4
     title: Come funzionano i Transformer?
   - local: chapter1/5
-    title: Modelli Encoder
+    title: Modelli encoder

From 5b4b3c432277444abee56a9a31a2d50ff76960db Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Mon, 30 May 2022 20:39:14 +0100
Subject: [PATCH 102/127] Update 5.mdx

---
 chapters/it/chapter1/5.mdx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/chapters/it/chapter1/5.mdx b/chapters/it/chapter1/5.mdx
index d08266396..3ff4b2080 100644
--- a/chapters/it/chapter1/5.mdx
+++ b/chapters/it/chapter1/5.mdx
@@ -2,13 +2,13 @@
 
 <Youtube id="MUqNwgPjJvQ" />
 
-Encoder models use only the encoder of a Transformer model. At each stage, the attention layers can access all the words in the initial sentence. These models are often characterized as having "bi-directional" attention, and are often called *auto-encoding models*.
+I modelli encoder utilizzano solo l'encoder di un modello Transformer. In ogni fase, the attention layers can access all the words in the initial sentence. These models are often characterized as having "bi-directional" attention, e vengono spesso chiamati *auto-encoding models*.
 
 The pretraining of these models usually revolves around somehow corrupting a given sentence (for instance, by masking random words in it) and tasking the model with finding or reconstructing the initial sentence.
 
 Encoder models are best suited for tasks requiring an understanding of the full sentence, such as sentence classification, named entity recognition (and more generally word classification), and extractive question answering.
 
-Representatives of this family of models include:
+Alcuni esempi di modelli di questo tipo includono:
 
 - [ALBERT](https://huggingface.co/transformers/model_doc/albert.html)
 - [BERT](https://huggingface.co/transformers/model_doc/bert.html)

From 4c47ac349096b77e2175bf8576776a08ae6768f8 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Mon, 30 May 2022 20:53:44 +0100
Subject: [PATCH 103/127] Final update

---
 chapters/it/chapter1/5.mdx | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/chapters/it/chapter1/5.mdx b/chapters/it/chapter1/5.mdx
index 3ff4b2080..4c90404a2 100644
--- a/chapters/it/chapter1/5.mdx
+++ b/chapters/it/chapter1/5.mdx
@@ -2,11 +2,11 @@
 
 <Youtube id="MUqNwgPjJvQ" />
 
-I modelli encoder utilizzano solo l'encoder di un modello Transformer. In ogni fase, the attention layers can access all the words in the initial sentence. These models are often characterized as having "bi-directional" attention, e vengono spesso chiamati *auto-encoding models*.
+I modelli encoder utilizzano solo l'encoder di un modello Transformer. In ogni fase, i layer di attenzione hanno accesso a tutte le parole della frase di partenza. Questi modelli sono spesso caratterizzati come aventi attenzione "bi-direzionale" e chiamati *auto-encoding models*.
 
-The pretraining of these models usually revolves around somehow corrupting a given sentence (for instance, by masking random words in it) and tasking the model with finding or reconstructing the initial sentence.
+Solitamente, il pre-addestramento di questi modelli consiste nel corrompere una determinata frase (ad esempio, nascondendone casualmente alcune parole) e incaricare il modello di ritrovare o ricostruire la frase di partenza.
 
-Encoder models are best suited for tasks requiring an understanding of the full sentence, such as sentence classification, named entity recognition (and more generally word classification), and extractive question answering.
+I modelli encoder sono particolarmente appropriati per compiti che rischiedono la comprensione di frasi intere, quali la classificazione di frasi, riconoscimento delle entità nominate (e in senso più ampio, la classificazione di parole), e l'estrazione di risposte da un contesto.
 
 Alcuni esempi di modelli di questo tipo includono:
 

From 1a91250bd8a19a1528c3810a012463c1ed5aa227 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Mon, 30 May 2022 21:10:12 +0100
Subject: [PATCH 104/127] Update 4.mdx

---
 chapters/it/chapter1/4.mdx | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/chapters/it/chapter1/4.mdx b/chapters/it/chapter1/4.mdx
index c83252c5e..1ce71bc24 100644
--- a/chapters/it/chapter1/4.mdx
+++ b/chapters/it/chapter1/4.mdx
@@ -1,6 +1,6 @@
 # Come funzionano i Transformer?
 
-In questa sezione, sorvoleremo in maniera avanzata l'architettura dei modelli Transformer.
+In questa sezione, vedremo in maniera avanzata l'architettura dei modelli Transformer.
 
 ## Un po' di storia dei Transformer
 
@@ -19,17 +19,17 @@ L'[architettura Transformer](https://arxiv.org/abs/1706.03762) è stata introdot
 
 - **febbraio 2019**: [GPT-2](https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf), una versione (migliorata e ingrandita) di GPT che non fu distribuita immediatamente al pubblico a causa di preoccupazioni etiche
 
-- **ottobre 2019**: [DistilBERT](https://arxiv.org/abs/1910.01108), a distilled version of BERT that is 60% faster, 40% lighter in memory, and still retains 97% of BERT's performance
+- **ottobre 2019**: [DistilBERT](https://arxiv.org/abs/1910.01108), una versione distillata di BERT che è il 60% più rapida, il 40% più leggera in memoria, e ritiene tuttavia il 97% della performance di BERT
 
-- **ottobre 2019**: [BART](https://arxiv.org/abs/1910.13461) and [T5](https://arxiv.org/abs/1910.10683), due grossi modelli pre-addestrati che utilizzano la stessa architettura del modello Transformer originale (nonché i primi a farlo)
+- **ottobre 2019**: [BART](https://arxiv.org/abs/1910.13461) e [T5](https://arxiv.org/abs/1910.10683), due grossi modelli pre-addestrati che utilizzano la stessa architettura del modello Transformer originale (nonché i primi a farlo)
 
-- **maggio 2020**, [GPT-3](https://arxiv.org/abs/2005.14165), an even bigger version of GPT-2 that is able to perform well on a variety of tasks without the need for fine-tuning (called _zero-shot learning_)
+- **maggio 2020**, [GPT-3](https://arxiv.org/abs/2005.14165), una versione ancora più grande di GPT-2, con buone prestazioni in vari compiti e nessun bisogno di fine-tuning (il cosiddetto _zero-shot learning_)
 
-This list is far from comprehensive, and is just meant to highlight a few of the different kinds of Transformer models. Broadly, they can be grouped into three categories:
+This list is far from comprehensive, and is just meant to highlight a few of the different kinds of Transformer models. In genere, è possibile raggrupparli in tre categorie:
 
-- GPT-like (also called _auto-regressive_ Transformer models)
-- BERT-like (also called _auto-encoding_ Transformer models) 
-- BART/T5-like (also called _sequence-to-sequence_ Transformer models)
+- Modelli in stile GPT (detti anche modelli Transformer _auto-regressive_)
+- Modelli in stile BERT (detti anche modelli Transformer _auto-encoding_) 
+- Modelli in stile BART/T5 (detti anche modelli Transformer _sequence-to-sequence_)
 
 We will dive into these families in more depth later on.
 

From 854e13b3d766b4fb046525205b781a15b421748d Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Mon, 30 May 2022 21:19:10 +0100
Subject: [PATCH 105/127] Update 4.mdx

---
 chapters/it/chapter1/4.mdx | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/chapters/it/chapter1/4.mdx b/chapters/it/chapter1/4.mdx
index 1ce71bc24..3ed5d5752 100644
--- a/chapters/it/chapter1/4.mdx
+++ b/chapters/it/chapter1/4.mdx
@@ -23,19 +23,19 @@ L'[architettura Transformer](https://arxiv.org/abs/1706.03762) è stata introdot
 
 - **ottobre 2019**: [BART](https://arxiv.org/abs/1910.13461) e [T5](https://arxiv.org/abs/1910.10683), due grossi modelli pre-addestrati che utilizzano la stessa architettura del modello Transformer originale (nonché i primi a farlo)
 
-- **maggio 2020**, [GPT-3](https://arxiv.org/abs/2005.14165), una versione ancora più grande di GPT-2, con buone prestazioni in vari compiti e nessun bisogno di fine-tuning (il cosiddetto _zero-shot learning_)
+- **maggio 2020**, [GPT-3](https://arxiv.org/abs/2005.14165), una versione ancora più ampia di GPT-2, con buone prestazioni in vari compiti e nessun bisogno di fine-tuning (il cosiddetto _zero-shot learning_)
 
-This list is far from comprehensive, and is just meant to highlight a few of the different kinds of Transformer models. In genere, è possibile raggrupparli in tre categorie:
+La lista è lontana dall'essere esaustiva ed è volta solo a mettere in evidenza alcuni dei diversi tipi di modelli Transformer. In genere, questi possono essere raggruppati in tre categorie:
 
 - Modelli in stile GPT (detti anche modelli Transformer _auto-regressive_)
 - Modelli in stile BERT (detti anche modelli Transformer _auto-encoding_) 
 - Modelli in stile BART/T5 (detti anche modelli Transformer _sequence-to-sequence_)
 
-We will dive into these families in more depth later on.
+Studieremo queste famiglie più nel dettaglio in seguito.
 
-## Transformers are language models
+## I Transformer sono modelli linguistici
 
-All the Transformer models mentioned above (GPT, BERT, BART, T5, etc.) have been trained as *language models*. This means they have been trained on large amounts of raw text in a self-supervised fashion. Self-supervised learning is a type of training in which the objective is automatically computed from the inputs of the model. That means that humans are not needed to label the data!
+Tutti i modelli Transformer menzionati qui sopra (GPT, BERT, BART, T5, ecc.) sono stati addestrati come language models (*modelli linguistici*). This means they have been trained on large amounts of raw text in a self-supervised fashion. Self-supervised learning is a type of training in which the objective is automatically computed from the inputs of the model. That means that humans are not needed to label the data!
 
 This type of model develops a statistical understanding of the language it has been trained on, but it's not very useful for specific practical tasks. Because of this, the general pretrained model then goes through a process called *transfer learning*. During this process, the model is fine-tuned in a supervised way -- that is, using human-annotated labels -- on a given task.
 

From fe88ff1c8babdbe1988a49a8bf0c5521983cfd2e Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Mon, 30 May 2022 21:20:30 +0100
Subject: [PATCH 106/127] Creates 6.mdx

---
 chapters/it/chapter1/6.mdx | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)
 create mode 100644 chapters/it/chapter1/6.mdx

diff --git a/chapters/it/chapter1/6.mdx b/chapters/it/chapter1/6.mdx
new file mode 100644
index 000000000..d86cea9e5
--- /dev/null
+++ b/chapters/it/chapter1/6.mdx
@@ -0,0 +1,16 @@
+# Decoder models
+
+<Youtube id="d_ixlCubqQw" />
+
+Decoder models use only the decoder of a Transformer model. At each stage, for a given word the attention layers can only access the words positioned before it in the sentence. These models are often called *auto-regressive models*.
+
+The pretraining of decoder models usually revolves around predicting the next word in the sentence.
+
+These models are best suited for tasks involving text generation.
+
+Representatives of this family of models include:
+
+- [CTRL](https://huggingface.co/transformers/model_doc/ctrl.html)
+- [GPT](https://huggingface.co/transformers/model_doc/gpt.html)
+- [GPT-2](https://huggingface.co/transformers/model_doc/gpt2.html)
+- [Transformer XL](https://huggingface.co/transformers/model_doc/transfo-xl.html)

From d54ffe13290383e82a2ae4800644286280c5c7b4 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Mon, 30 May 2022 21:52:08 +0100
Subject: [PATCH 107/127] Adds ch 1/7

---
 chapters/it/_toctree.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/chapters/it/_toctree.yml b/chapters/it/_toctree.yml
index 87ed3a6d2..4ced27d8b 100644
--- a/chapters/it/_toctree.yml
+++ b/chapters/it/_toctree.yml
@@ -15,3 +15,5 @@
     title: Come funzionano i Transformer?
   - local: chapter1/5
     title: Modelli encoder
+  - local: chapter1/6
+    title: Modelli decoder

From ab1da13ade1878b888d8318c21ca8970b7a629f5 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Mon, 30 May 2022 22:04:17 +0100
Subject: [PATCH 108/127] Update 6.mdx

---
 chapters/it/chapter1/6.mdx | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/chapters/it/chapter1/6.mdx b/chapters/it/chapter1/6.mdx
index d86cea9e5..5b058aa5b 100644
--- a/chapters/it/chapter1/6.mdx
+++ b/chapters/it/chapter1/6.mdx
@@ -1,14 +1,14 @@
-# Decoder models
+# Modelli decoder
 
 <Youtube id="d_ixlCubqQw" />
 
-Decoder models use only the decoder of a Transformer model. At each stage, for a given word the attention layers can only access the words positioned before it in the sentence. These models are often called *auto-regressive models*.
+I modelli decoder utilizzano solo il decoder di un modello Transformer. Ad ogni passaggio e per una data parola, i layers di attenzione hanno accesso solo alle parole che la precedono nella frase. These models are often called *auto-regressive models*.
 
 The pretraining of decoder models usually revolves around predicting the next word in the sentence.
 
-These models are best suited for tasks involving text generation.
+Questi modelli sono particolarmente adattu a compiti che hanno a che fare con la generazione testuale.
 
-Representatives of this family of models include:
+Alcuni rappresentanti di questa famiglia includono:
 
 - [CTRL](https://huggingface.co/transformers/model_doc/ctrl.html)
 - [GPT](https://huggingface.co/transformers/model_doc/gpt.html)

From 7708eaf37387522a14570bcffb5ecb81d98ba466 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Mon, 30 May 2022 22:06:51 +0100
Subject: [PATCH 109/127] Last update

---
 chapters/it/chapter1/6.mdx | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/chapters/it/chapter1/6.mdx b/chapters/it/chapter1/6.mdx
index 5b058aa5b..a04e5504a 100644
--- a/chapters/it/chapter1/6.mdx
+++ b/chapters/it/chapter1/6.mdx
@@ -2,11 +2,11 @@
 
 <Youtube id="d_ixlCubqQw" />
 
-I modelli decoder utilizzano solo il decoder di un modello Transformer. Ad ogni passaggio e per una data parola, i layers di attenzione hanno accesso solo alle parole che la precedono nella frase. These models are often called *auto-regressive models*.
+I modelli decoder utilizzano solo il decoder di un modello Transformer. Ad ogni passaggio e per una data parola, i layers di attenzione hanno accesso solo alle parole che la precedono nella frase. Questi modelli sono spesso detti *auto-regressive models*.
 
-The pretraining of decoder models usually revolves around predicting the next word in the sentence.
+Il pre-addestramento dei modelli decoder ha spesso a che fare con la previsione della parola successiva in un contesto frasale.
 
-Questi modelli sono particolarmente adattu a compiti che hanno a che fare con la generazione testuale.
+Questi modelli sono particolarmente adatti a compiti di generazione testuale.
 
 Alcuni rappresentanti di questa famiglia includono:
 

From e9900e6fc6c36ab6f2526db12b7a904f551db9dc Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Mon, 30 May 2022 22:22:49 +0100
Subject: [PATCH 110/127] Update 4.mdx

---
 chapters/it/chapter1/4.mdx | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/chapters/it/chapter1/4.mdx b/chapters/it/chapter1/4.mdx
index 3ed5d5752..5b80c9e8f 100644
--- a/chapters/it/chapter1/4.mdx
+++ b/chapters/it/chapter1/4.mdx
@@ -35,27 +35,27 @@ Studieremo queste famiglie più nel dettaglio in seguito.
 
 ## I Transformer sono modelli linguistici
 
-Tutti i modelli Transformer menzionati qui sopra (GPT, BERT, BART, T5, ecc.) sono stati addestrati come language models (*modelli linguistici*). This means they have been trained on large amounts of raw text in a self-supervised fashion. Self-supervised learning is a type of training in which the objective is automatically computed from the inputs of the model. That means that humans are not needed to label the data!
+Tutti i modelli Transformer menzionati qui sopra (GPT, BERT, BART, T5, ecc.) sono stati addestrati come modelli linguistici (*language models*). This means they have been trained on large amounts of raw text in a self-supervised fashion. Self-supervised learning is a type of training in which the objective is automatically computed from the inputs of the model. Ciò significa che non è richiesto intervento umano per etichettare i dati!
 
 This type of model develops a statistical understanding of the language it has been trained on, but it's not very useful for specific practical tasks. Because of this, the general pretrained model then goes through a process called *transfer learning*. During this process, the model is fine-tuned in a supervised way -- that is, using human-annotated labels -- on a given task.
 
-An example of a task is predicting the next word in a sentence having read the *n* previous words. This is called *causal language modeling* because the output depends on the past and present inputs, but not the future ones.
+Un esempio di compito è la previsione della parola seguente in una frase di cui sono state lette *n* parole precedenti. Quest'operazione si chiama *causal language modeling* perché il suo output dipende gli input presenti e passati, ma non da quelli futuri.
 
 <div class="flex justify-center">
 <img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/causal_modeling.svg" alt="Example of causal language modeling in which the next word from a sentence is predicted.">
 <img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/causal_modeling-dark.svg" alt="Example of causal language modeling in which the next word from a sentence is predicted.">
 </div>
 
-Another example is *masked language modeling*, in which the model predicts a masked word in the sentence.
+Un altro esempio è il *masked language modeling*, in cui il modello prevede una parola occultata della frase.
 
 <div class="flex justify-center">
 <img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/masked_modeling.svg" alt="Example of masked language modeling in which a masked word from a sentence is predicted.">
 <img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/masked_modeling-dark.svg" alt="Example of masked language modeling in which a masked word from a sentence is predicted.">
 </div>
 
-## Transformers are big models
+## I Transformers sono modelli enormi
 
-Apart from a few outliers (like DistilBERT), the general strategy to achieve better performance is by increasing the models' sizes as well as the amount of data they are pretrained on.
+A parte per alcune eccezioni (come DistilBERT), la strategia generale per ottenere performance migliori consiste nell'aumentare la taglia dei modelli, nonché la quantità di dati utilizzati per il pre-addestramento.
 
 <div class="flex justify-center">
 <img src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/model_parameters.png" alt="Number of parameters of recent Transformers models" width="90%">

From 246b43aac6121d0dc149cf1c861130e9eb4fe775 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Mon, 30 May 2022 22:27:03 +0100
Subject: [PATCH 111/127] Update 4.mdx

---
 chapters/it/chapter1/4.mdx | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/chapters/it/chapter1/4.mdx b/chapters/it/chapter1/4.mdx
index 5b80c9e8f..9cce661a9 100644
--- a/chapters/it/chapter1/4.mdx
+++ b/chapters/it/chapter1/4.mdx
@@ -107,13 +107,13 @@ Fine-tuning a model therefore has lower time, data, financial, and environmental
 
 This process will also achieve better results than training from scratch (unless you have lots of data), which is why you should always try to leverage a pretrained model -- one as close as possible to the task you have at hand -- and fine-tune it.
 
-## General architecture
+## Architettura generale
 
 In this section, we'll go over the general architecture of the Transformer model. Don't worry if you don't understand some of the concepts; there are detailed sections later covering each of the components.
 
 <Youtube id="H39Z_720T5s" />
 
-## Introduction
+## Introduzione
 
 The model is primarily composed of two blocks:
 
@@ -133,7 +133,7 @@ Each of these parts can be used independently, depending on the task:
 
 We will dive into those architectures independently in later sections.
 
-## Attention layers
+## Layers di attenzione
 
 A key feature of Transformer models is that they are built with special layers called *attention layers*. In fact, the title of the paper introducing the Transformer architecture was ["Attention Is All You Need"](https://arxiv.org/abs/1706.03762)! We will explore the details of attention layers later in the course; for now, all you need to know is that this layer will tell the model to pay specific attention to certain words in the sentence you passed it (and more or less ignore the others) when dealing with the representation of each word.
 
@@ -143,7 +143,7 @@ The same concept applies to any task associated with natural language: a word by
 
 Now that you have an idea of what attention layers are all about, let's take a closer look at the Transformer architecture.
 
-## The original architecture
+## L'architettura originale
 
 The Transformer architecture was originally designed for translation. During training, the encoder receives inputs (sentences) in a certain language, while the decoder receives the same sentences in the desired target language. In the encoder, the attention layers can use all the words in a sentence (since, as we just saw, the translation of a given word can be dependent on what is after as well as before it in the sentence). The decoder, however, works sequentially and can only pay attention to the words in the sentence that it has already translated (so, only the words before the word currently being generated). For example, when we have predicted the first three words of the translated target, we give them to the decoder  which then uses all the inputs of the encoder to try to predict the fourth word.
 
@@ -160,12 +160,12 @@ Note that the first attention layer in a decoder block pays attention to all (pa
 
 The *attention mask* can also be used in the encoder/decoder to prevent the model from paying attention to some special words -- for instance, the special padding word used to make all the inputs the same length when batching together sentences.
 
-##  Architectures vs. checkpoints
+##  Architetture vs. checkpoint
 
 As we dive into Transformer models in this course, you'll see mentions of *architectures* and *checkpoints* as well as *models*. These terms all have slightly different meanings: 
 
-* **Architecture**: This is the skeleton of the model -- the definition of each layer and each operation that happens within the model. 
-* **Checkpoints**: These are the weights that will be loaded in a given architecture.
-* **Model**: This is an umbrella term that isn't as precise as "architecture" or "checkpoint": it can mean both. This course will specify *architecture* or *checkpoint* when it matters to reduce ambiguity.
+* **Architettura**: This is the skeleton of the model -- the definition of each layer and each operation that happens within the model. 
+* **Checkpoint**: These are the weights that will be loaded in a given architecture.
+* **Modello**: This is an umbrella term that isn't as precise as "architecture" or "checkpoint": it can mean both. This course will specify *architecture* or *checkpoint* when it matters to reduce ambiguity.
 
 For example, BERT is an architecture while `bert-base-cased`, a set of weights trained by the Google team for the first release of BERT, is a checkpoint. However, one can say "the BERT model" and "the `bert-base-cased` model."

From a3f6e7f8f9bcc557d3574d56459c79d844f2017d Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Mon, 30 May 2022 22:44:33 +0100
Subject: [PATCH 112/127] Update 4.mdx

---
 chapters/it/chapter1/4.mdx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/chapters/it/chapter1/4.mdx b/chapters/it/chapter1/4.mdx
index 9cce661a9..de21cc0b2 100644
--- a/chapters/it/chapter1/4.mdx
+++ b/chapters/it/chapter1/4.mdx
@@ -35,9 +35,9 @@ Studieremo queste famiglie più nel dettaglio in seguito.
 
 ## I Transformer sono modelli linguistici
 
-Tutti i modelli Transformer menzionati qui sopra (GPT, BERT, BART, T5, ecc.) sono stati addestrati come modelli linguistici (*language models*). This means they have been trained on large amounts of raw text in a self-supervised fashion. Self-supervised learning is a type of training in which the objective is automatically computed from the inputs of the model. Ciò significa che non è richiesto intervento umano per etichettare i dati!
+Tutti i modelli Transformer menzionati qui sopra (GPT, BERT, BART, T5, ecc.) sono stati addestrati come modelli linguistici (*language models*). Ciò significa che sono stati addestrati su grandi quantità di testo grezzo in stile auto-supervisionato (*self-supervising*). L'apprendimento auto-supervisionato è un tipo di apprendimento il cui obbiettivo viene computato direttamente dagli input del modello. Ciò significa che non è richiesto intervento umano per etichettare i dati!
 
-This type of model develops a statistical understanding of the language it has been trained on, but it's not very useful for specific practical tasks. Because of this, the general pretrained model then goes through a process called *transfer learning*. During this process, the model is fine-tuned in a supervised way -- that is, using human-annotated labels -- on a given task.
+Un modello di questo tipo sviluppa una comprensione statistica della lingua alla quale è stato addestrato, ma non è molto utile in compiti pratici precisi. Per questa ragione, il modello pre-addestrato generale viene in seguito sottoposto a un processo detto transfer learning (*apprendimento del trasferimento*). Durante questo processo, il modello viene affinato per un determinato compito in maniera supervisionata (ossia utilizzando etichette generate da umani).
 
 Un esempio di compito è la previsione della parola seguente in una frase di cui sono state lette *n* parole precedenti. Quest'operazione si chiama *causal language modeling* perché il suo output dipende gli input presenti e passati, ma non da quelli futuri.
 

From b630db4b757370513a23a53d420b925b71bd7498 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Mon, 30 May 2022 22:48:58 +0100
Subject: [PATCH 113/127] Update 4.mdx

---
 chapters/it/chapter1/4.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/chapters/it/chapter1/4.mdx b/chapters/it/chapter1/4.mdx
index de21cc0b2..7c5529243 100644
--- a/chapters/it/chapter1/4.mdx
+++ b/chapters/it/chapter1/4.mdx
@@ -61,7 +61,7 @@ A parte per alcune eccezioni (come DistilBERT), la strategia generale per ottene
 <img src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/model_parameters.png" alt="Number of parameters of recent Transformers models" width="90%">
 </div>
 
-Unfortunately, training a model, especially a large one, requires a large amount of data. This becomes very costly in terms of time and compute resources. It even translates to environmental impact, as can be seen in the following graph.
+Sfortunatamente, l'addestramento di un modello, e specialmente di un modello grosso, richiede una grande quantità di dati. Ciò si rivela molto costoso in termini di tempo, risorse informatiche e impatto ambientale, come mostrano i grafici qui sotto.
 
 <div class="flex justify-center">
 <img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/carbon_footprint.svg" alt="The carbon footprint of a large language model.">

From b9713ca81ab71385894453a5e58fdb289e60a72b Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Mon, 30 May 2022 23:04:43 +0100
Subject: [PATCH 114/127] Update 4.mdx

---
 chapters/it/chapter1/4.mdx | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/chapters/it/chapter1/4.mdx b/chapters/it/chapter1/4.mdx
index 7c5529243..390f4a08e 100644
--- a/chapters/it/chapter1/4.mdx
+++ b/chapters/it/chapter1/4.mdx
@@ -70,27 +70,27 @@ Sfortunatamente, l'addestramento di un modello, e specialmente di un modello gro
 
 <Youtube id="ftWlj4FBHTg"/>
 
-And this is showing a project for a (very big) model led by a team consciously trying to reduce the environmental impact of pretraining. The footprint of running lots of trials to get the best hyperparameters would be even higher.
+E questi dati sono di un progetto per un modello (molto grande) condotto da un team che provava consciamente a ridurre l'impatto ambientale del pre-addestramento! L'impronta di trials volti a ottenere i miglior iperparamenti possibili sarebbe ancora più importante.
 
-Imagine if each time a research team, a student organization, or a company wanted to train a model, it did so from scratch. This would lead to huge, unnecessary global costs!
+Immagina cosa succederebbe se ogni volta che un gruppo di ricerca, un'organizzazione studentesca o un'azienda vuole addestrare un modello lo facesse da zero. I costi globali sarebbero inutilmente enormi!
 
-This is why sharing language models is paramount: sharing the trained weights and building on top of already trained weights reduces the overall compute cost and carbon footprint of the community.
+Questo è il motivo per cui la condivisione di modelli linguistici è fondamentale: lavorare a partire da modelli già addestrati riduce i costi informatici complessivi e l'impatto ambientale della comunità.
 
 
 ## Transfer Learning
 
 <Youtube id="BqqfQnyjmgg" />
 
-*Pretraining* is the act of training a model from scratch: the weights are randomly initialized, and the training starts without any prior knowledge.
+Il pre-addestramento è l'atto di addestrare un modello da zero: i pesi sono inizializzati in maniera casuale, e l'addestramento inizia senza alcuna conoscenza pregressa.
 
 <div class="flex justify-center">
 <img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/pretraining.svg" alt="The pretraining of a language model is costly in both time and money.">
 <img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/pretraining-dark.svg" alt="The pretraining of a language model is costly in both time and money.">
 </div>
 
-This pretraining is usually done on very large amounts of data. Therefore, it requires a very large corpus of data, and training can take up to several weeks.
+Questo pre-addestramento è solitamente fatto su enormi quantità di dati. Di conseguenza, l'addestramento richiede un corpus di dati molto ampio e può prendere diverse settimane.
 
-*Fine-tuning*, on the other hand, is the training done **after** a model has been pretrained. To perform fine-tuning, you first acquire a pretrained language model, then perform additional training with a dataset specific to your task. Wait -- why not simply train directly for the final task? There are a couple of reasons:
+L'affinamento (*fine-tuning*), al contrarion, è un addestramento che ha luogo **dopo** che il modello è stato pre-addestrato. To perform fine-tuning, you first acquire a pretrained language model, then perform additional training with a dataset specific to your task. Wait -- why not simply train directly for the final task? There are a couple of reasons:
 
 *  The pretrained model was already trained on a dataset that has some similarities with the fine-tuning dataset. The fine-tuning process is thus able to take advantage of knowledge acquired by the initial model during pretraining (for instance, with NLP problems, the pretrained model will have some kind of statistical understanding of the language you are using for your task). 
 *  Since the pretrained model was already trained on lots of data, the fine-tuning requires way less data to get decent results.

From 771f2a7d7b38e890b640929367a7914c3ab020ea Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 31 May 2022 22:38:42 +0100
Subject: [PATCH 115/127] Translates last paragraph

---
 chapters/it/chapter1/4.mdx | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/chapters/it/chapter1/4.mdx b/chapters/it/chapter1/4.mdx
index 390f4a08e..bd79165c1 100644
--- a/chapters/it/chapter1/4.mdx
+++ b/chapters/it/chapter1/4.mdx
@@ -162,10 +162,10 @@ The *attention mask* can also be used in the encoder/decoder to prevent the mode
 
 ##  Architetture vs. checkpoint
 
-As we dive into Transformer models in this course, you'll see mentions of *architectures* and *checkpoints* as well as *models*. These terms all have slightly different meanings: 
+Durante questo viaggio nel mondo dei modelli Transformer, incontrerai menzioni di *architetture* e *checkpoint*, nonché di *modelli*. Questi termini hanno significati leggermente diversi: 
 
-* **Architettura**: This is the skeleton of the model -- the definition of each layer and each operation that happens within the model. 
-* **Checkpoint**: These are the weights that will be loaded in a given architecture.
-* **Modello**: This is an umbrella term that isn't as precise as "architecture" or "checkpoint": it can mean both. This course will specify *architecture* or *checkpoint* when it matters to reduce ambiguity.
+* **Architettura**: Lo scheletro del modello, ossia la definizione di ogni livello e operazione che compare nel modello. 
+* **Checkpoint**: I pesi che verranno caricati in una determinata architettura.
+* **Modello**: Un termine generico meno preciso di "architettura" o "checkpoint", in quanto può significare entrambi. In questo corso faremo la distinzione tra *architettura* e *checkpoint* quando sarà necessario ridurre le ambiguità.
 
-For example, BERT is an architecture while `bert-base-cased`, a set of weights trained by the Google team for the first release of BERT, is a checkpoint. However, one can say "the BERT model" and "the `bert-base-cased` model."
+Ad esempio, BERT è un'architettura, mentre `bert-base-cased`, un set di pesi (*weights*) addestrati dal team di Google per la prima versione di BERT, è un checkpoint. Ciononostante, è possibile dire "il modello BERT" e "il modello `bert-base-cased`."

From 59c7f4ee07b242484d6993b05ac47f0b3f73ef4a Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 31 May 2022 23:01:30 +0100
Subject: [PATCH 116/127] Update 4.mdx

---
 chapters/it/chapter1/4.mdx | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/chapters/it/chapter1/4.mdx b/chapters/it/chapter1/4.mdx
index bd79165c1..254a716fa 100644
--- a/chapters/it/chapter1/4.mdx
+++ b/chapters/it/chapter1/4.mdx
@@ -109,31 +109,31 @@ This process will also achieve better results than training from scratch (unless
 
 ## Architettura generale
 
-In this section, we'll go over the general architecture of the Transformer model. Don't worry if you don't understand some of the concepts; there are detailed sections later covering each of the components.
+In questa sezione, vedremo l'architettura generale del modello Transformer. Non preoccuparti se non capisci tutti i concetti: più avanti, troverai sezioni dettagliate per ogni componente.
 
 <Youtube id="H39Z_720T5s" />
 
 ## Introduzione
 
-The model is primarily composed of two blocks:
+Il modello si compone principalmente di due blocchi:
 
-* **Encoder (left)**: The encoder receives an input and builds a representation of it (its features). This means that the model is optimized to acquire understanding from the input.
-* **Decoder (right)**: The decoder uses the encoder's representation (features) along with other inputs to generate a target sequence. This means that the model is optimized for generating outputs.
+* **Encoder (sinistra)**: L'encoder riceve un input e ne costruisce una rappresentazione, le features. Ciò significa che il modello è ottimizzato per la comprensione dell'input.
+* **Decoder (destra)**: Il decoder utilizza la rappresentazione dell'encoder (le features) assieme ad ulteriori input per generare la sequenza target. Ciò significa che il modello è ottimizzato per la generazione di output.
 
 <div class="flex justify-center">
 <img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/transformers_blocks.svg" alt="Architecture of a Transformers models">
 <img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/transformers_blocks-dark.svg" alt="Architecture of a Transformers models">
 </div>
 
-Each of these parts can be used independently, depending on the task: 
+Ognuna di queste parti può essere utilizzata indipendentemente, in base al compito: 
 
-* **Encoder-only models**: Good for tasks that require understanding of the input, such as sentence classification and named entity recognition.
-* **Decoder-only models**: Good for generative tasks such as text generation.
-* **Encoder-decoder models** or **sequence-to-sequence models**: Good for generative tasks that require an input, such as translation or summarization.
+* **Modelli Encoder-only**: Ottimi per compiti che richiedono una comprensione dell'input, come la classificazione frasale e il riconoscimento delle entità nominate.
+* **Modelli Decoder-only**: Ottimi per compiti generativi come la generazione testuale.
+* **Modelli Encoder-decoder** o **modelli sequence-to-sequence**: Ottimi per compiti generativi che richiedono un input, come la traduzione o il riassunto.
 
-We will dive into those architectures independently in later sections.
+Analizzeremo ciascuna di queste architetture indipendentemente più tardi nel corso.
 
-## Layers di attenzione
+## Livelli di attenzione
 
 A key feature of Transformer models is that they are built with special layers called *attention layers*. In fact, the title of the paper introducing the Transformer architecture was ["Attention Is All You Need"](https://arxiv.org/abs/1706.03762)! We will explore the details of attention layers later in the course; for now, all you need to know is that this layer will tell the model to pay specific attention to certain words in the sentence you passed it (and more or less ignore the others) when dealing with the representation of each word.
 

From 595b4da7b82781c985ccacf06d89b89568f8bf80 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 31 May 2022 23:01:53 +0100
Subject: [PATCH 117/127] Update 4.mdx

---
 chapters/it/chapter1/4.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/chapters/it/chapter1/4.mdx b/chapters/it/chapter1/4.mdx
index 254a716fa..fc8840bdf 100644
--- a/chapters/it/chapter1/4.mdx
+++ b/chapters/it/chapter1/4.mdx
@@ -1,6 +1,6 @@
 # Come funzionano i Transformer?
 
-In questa sezione, vedremo in maniera avanzata l'architettura dei modelli Transformer.
+In questa sezione, vedremo in maniera approfondita l'architettura dei modelli Transformer.
 
 ## Un po' di storia dei Transformer
 

From 31aac9e0b15d2e7eb3707d4072b33ae9c1302db8 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 14 Jun 2022 13:58:03 +0100
Subject: [PATCH 118/127] Update 4.mdx

---
 chapters/it/chapter1/4.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/chapters/it/chapter1/4.mdx b/chapters/it/chapter1/4.mdx
index fc8840bdf..174d28287 100644
--- a/chapters/it/chapter1/4.mdx
+++ b/chapters/it/chapter1/4.mdx
@@ -90,7 +90,7 @@ Il pre-addestramento è l'atto di addestrare un modello da zero: i pesi sono ini
 
 Questo pre-addestramento è solitamente fatto su enormi quantità di dati. Di conseguenza, l'addestramento richiede un corpus di dati molto ampio e può prendere diverse settimane.
 
-L'affinamento (*fine-tuning*), al contrarion, è un addestramento che ha luogo **dopo** che il modello è stato pre-addestrato. To perform fine-tuning, you first acquire a pretrained language model, then perform additional training with a dataset specific to your task. Wait -- why not simply train directly for the final task? There are a couple of reasons:
+L'affinamento (*fine-tuning*), al contrarion, è un addestramento che ha luogo **dopo** che il modello è stato pre-addestrato. Per poter performare un fine-tuning, è necessario acquisire un modello linguistico pre-addestrato e addestrarlo ulteriormente con una base dati adatta al compito in questione. Ma perché non addestrare direttamente al compito finale? Esistono alcune ragioni:
 
 *  The pretrained model was already trained on a dataset that has some similarities with the fine-tuning dataset. The fine-tuning process is thus able to take advantage of knowledge acquired by the initial model during pretraining (for instance, with NLP problems, the pretrained model will have some kind of statistical understanding of the language you are using for your task). 
 *  Since the pretrained model was already trained on lots of data, the fine-tuning requires way less data to get decent results.

From a03f334d007a170dfa119e6baf9da702794752fd Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 14 Jun 2022 14:22:22 +0100
Subject: [PATCH 119/127] Update 4.mdx

---
 chapters/it/chapter1/4.mdx | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/chapters/it/chapter1/4.mdx b/chapters/it/chapter1/4.mdx
index 174d28287..386204a43 100644
--- a/chapters/it/chapter1/4.mdx
+++ b/chapters/it/chapter1/4.mdx
@@ -92,20 +92,20 @@ Questo pre-addestramento è solitamente fatto su enormi quantità di dati. Di co
 
 L'affinamento (*fine-tuning*), al contrarion, è un addestramento che ha luogo **dopo** che il modello è stato pre-addestrato. Per poter performare un fine-tuning, è necessario acquisire un modello linguistico pre-addestrato e addestrarlo ulteriormente con una base dati adatta al compito in questione. Ma perché non addestrare direttamente al compito finale? Esistono alcune ragioni:
 
-*  The pretrained model was already trained on a dataset that has some similarities with the fine-tuning dataset. The fine-tuning process is thus able to take advantage of knowledge acquired by the initial model during pretraining (for instance, with NLP problems, the pretrained model will have some kind of statistical understanding of the language you are using for your task). 
-*  Since the pretrained model was already trained on lots of data, the fine-tuning requires way less data to get decent results.
-*  For the same reason, the amount of time and resources needed to get good results are much lower.
+*  Il modello pre-addestrato è già addestrato su basi dati che contengono similarità con la base dati usata per il fine-tuning. Il processo di fine-tuning riesce quindi ad beneficiare della conoscenza acquisita dal modello iniziale durante il pre-addestramento (ad esempio, nei problemi di NLP, il modello pre-addestrato avrà già conoscenze statistiche della lingua utilizzata nel compito).
+*  Siccome il modello pre-addestrato è stato addestrato usando moltissimi dati, il fine-tuning richiede molto meno dati per ottenere buoni risultati.
+*  Per la stessa ragione, occorrono molto meno tempo e risorse per ottenere buoni risultati.
 
-For example, one could leverage a pretrained model trained on the English language and then fine-tune it on an arXiv corpus, resulting in a science/research-based model. The fine-tuning will only require a limited amount of data: the knowledge the pretrained model has acquired is "transferred," hence the term *transfer learning*.
+Ad esempio, è possibile approfittare di un modello pre-addestrato per la lingua inglese e poi affinarlo usando un corpus arXiv, ottenendo così un modello specifico per la scienza/ricerca. L'affinamento non richiederà che una quantità limitata di dati: le conoscenze acquisite dal modello pre-addestrato sono "trasferite", come riflette il nome *transfer learning*.
 
 <div class="flex justify-center">
 <img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/finetuning.svg" alt="The fine-tuning of a language model is cheaper than pretraining in both time and money.">
 <img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/finetuning-dark.svg" alt="The fine-tuning of a language model is cheaper than pretraining in both time and money.">
 </div>
 
-Fine-tuning a model therefore has lower time, data, financial, and environmental costs. It is also quicker and easier to iterate over different fine-tuning schemes, as the training is less constraining than a full pretraining.
+Il fine-tuning di un modello ha quindi costi ridotti in termini di dati, finanze e impatto ambientale. Iterare su diversi schemi di fine-tuning è anche più rapido e semplice, in quanto l'addestramento è meno restrittivo di un pre-addestramento completo.
 
-This process will also achieve better results than training from scratch (unless you have lots of data), which is why you should always try to leverage a pretrained model -- one as close as possible to the task you have at hand -- and fine-tune it.
+Questo processo permette anche di ottenere risultati migliori di un addestramento da zero (a meno di non essere in possesso di moltissimi dati), motivo per cui bisognerebbe sempre partire da un modello pre-addestrato (quanto possibile compatibile con il compito da eseguire) e affinarlo.
 
 ## Architettura generale
 

From faa592274cb36a7a9858f28972b3d61d6590d861 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 14 Jun 2022 14:51:32 +0100
Subject: [PATCH 120/127] Update 4.mdx

---
 chapters/it/chapter1/4.mdx | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/chapters/it/chapter1/4.mdx b/chapters/it/chapter1/4.mdx
index 386204a43..ca35ae5aa 100644
--- a/chapters/it/chapter1/4.mdx
+++ b/chapters/it/chapter1/4.mdx
@@ -133,15 +133,15 @@ Ognuna di queste parti può essere utilizzata indipendentemente, in base al comp
 
 Analizzeremo ciascuna di queste architetture indipendentemente più tardi nel corso.
 
-## Livelli di attenzione
+## Attention layers
 
-A key feature of Transformer models is that they are built with special layers called *attention layers*. In fact, the title of the paper introducing the Transformer architecture was ["Attention Is All You Need"](https://arxiv.org/abs/1706.03762)! We will explore the details of attention layers later in the course; for now, all you need to know is that this layer will tell the model to pay specific attention to certain words in the sentence you passed it (and more or less ignore the others) when dealing with the representation of each word.
+Una caratteristica chiave dei modelli Transformer è che sono basati su strati speciali detti *attention layers*. Non a caso, il titolo del paper che introdusse l'architettura Transformer era ["Attention Is All You Need"](https://arxiv.org/abs/1706.03762)! Esploreremo gli attention layer nel dettaglio più avanti in questo corso; per ora, tutto ciò che hai bisogno di sapere è che un layer dirà al modello di prestare particolare attenzione a certe parole nella frase input (ignorando praticamente le altre) quando si occupa della rappresentazione delle singole parole.
 
 To put this into context, consider the task of translating text from English to French. Given the input "You like this course", a translation model will need to also attend to the adjacent word "You" to get the proper translation for the word "like", because in French the verb "like" is conjugated differently depending on the subject. The rest of the sentence, however, is not useful for the translation of that word. In the same vein, when translating "this" the model will also need to pay attention to the word "course", because "this" translates differently depending on whether the associated noun is masculine or feminine. Again, the other words in the sentence will not matter for the translation of "this". With more complex sentences (and more complex grammar rules), the model would need to pay special attention to words that might appear farther away in the sentence to properly translate each word.
 
-The same concept applies to any task associated with natural language: a word by itself has a meaning, but that meaning is deeply affected by the context, which can be any other word (or words) before or after the word being studied.
+Lo stesso concetto si applica a qualsiasi compito che ha a che fare con il linguaggio naturale: una parola ha un senso a sé stante, ma tale senso è profondamente influenzato dal contesto, il quale è costituito da una qualsiasi parola (o parole) che precede o segue la parola sotto osservazione.
 
-Now that you have an idea of what attention layers are all about, let's take a closer look at the Transformer architecture.
+Ora che sai cosa sono gli attention layer, guardiamo un po' più nel dettaglio all'architettura Transformer.
 
 ## L'architettura originale
 

From 5924bd26ebd16578bd152d0f1d9579780c01ff59 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 14 Jun 2022 15:06:51 +0100
Subject: [PATCH 121/127] Update 4.mdx

---
 chapters/it/chapter1/4.mdx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/chapters/it/chapter1/4.mdx b/chapters/it/chapter1/4.mdx
index ca35ae5aa..baee8d350 100644
--- a/chapters/it/chapter1/4.mdx
+++ b/chapters/it/chapter1/4.mdx
@@ -137,7 +137,7 @@ Analizzeremo ciascuna di queste architetture indipendentemente più tardi nel co
 
 Una caratteristica chiave dei modelli Transformer è che sono basati su strati speciali detti *attention layers*. Non a caso, il titolo del paper che introdusse l'architettura Transformer era ["Attention Is All You Need"](https://arxiv.org/abs/1706.03762)! Esploreremo gli attention layer nel dettaglio più avanti in questo corso; per ora, tutto ciò che hai bisogno di sapere è che un layer dirà al modello di prestare particolare attenzione a certe parole nella frase input (ignorando praticamente le altre) quando si occupa della rappresentazione delle singole parole.
 
-To put this into context, consider the task of translating text from English to French. Given the input "You like this course", a translation model will need to also attend to the adjacent word "You" to get the proper translation for the word "like", because in French the verb "like" is conjugated differently depending on the subject. The rest of the sentence, however, is not useful for the translation of that word. In the same vein, when translating "this" the model will also need to pay attention to the word "course", because "this" translates differently depending on whether the associated noun is masculine or feminine. Again, the other words in the sentence will not matter for the translation of "this". With more complex sentences (and more complex grammar rules), the model would need to pay special attention to words that might appear farther away in the sentence to properly translate each word.
+Come esempio concreto, pensa ad un compito di traduzione testuale dall'inglese al francese. Dato l'input "You like this course", un modello di traduzione dovrà fare riferimento alla parola adiacente "You" per fornire la traduzione corretta della parola "like", perché in francese la coniugazione del verbo "like" cambia in base al soggetto. Diversamente, il resto della frase non è utile alla sua traduzione di quella precisa parola. In maniera simile, durante la traduzione di "this" il modello dovrà prestare attenzione alla parola "course", in quanto "this" ha traduzioni diverse se associato con nomi femminili o maschili. Di nuovo, il resto delle parole della frase non contribuiscono alla corretta traduzione di "this". Con frasi più complesse (e regole grammaticali più complesse), il modello potrebbe aver bisogno di prestare particolare attenzione a parole ben più lontane nella frase per tradurre correttamente ogni parola.
 
 Lo stesso concetto si applica a qualsiasi compito che ha a che fare con il linguaggio naturale: una parola ha un senso a sé stante, ma tale senso è profondamente influenzato dal contesto, il quale è costituito da una qualsiasi parola (o parole) che precede o segue la parola sotto osservazione.
 
@@ -149,7 +149,7 @@ The Transformer architecture was originally designed for translation. During tra
 
 To speed things up during training (when the model has access to target sentences), the decoder is fed the whole target, but it is not allowed to use future words (if it had access to the word at position 2 when trying to predict the word at position 2, the problem would not be very hard!). For instance, when trying to predict the fourth word, the attention layer will only have access to the words in positions 1 to 3.
 
-The original Transformer architecture looked like this, with the encoder on the left and the decoder on the right:
+L'architettura Transformer originale aveva la struttura qui sotto, con l'encoder a sinistra e il decoder a destra:
 
 <div class="flex justify-center">
 <img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/transformers.svg" alt="Architecture of a Transformers models">

From b0df42c432d3c4d86a43f4da1db8810ef0eaf078 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 14 Jun 2022 15:17:03 +0100
Subject: [PATCH 122/127] Update 4.mdx

---
 chapters/it/chapter1/4.mdx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/chapters/it/chapter1/4.mdx b/chapters/it/chapter1/4.mdx
index baee8d350..46214b6ad 100644
--- a/chapters/it/chapter1/4.mdx
+++ b/chapters/it/chapter1/4.mdx
@@ -145,9 +145,9 @@ Ora che sai cosa sono gli attention layer, guardiamo un po' più nel dettaglio a
 
 ## L'architettura originale
 
-The Transformer architecture was originally designed for translation. During training, the encoder receives inputs (sentences) in a certain language, while the decoder receives the same sentences in the desired target language. In the encoder, the attention layers can use all the words in a sentence (since, as we just saw, the translation of a given word can be dependent on what is after as well as before it in the sentence). The decoder, however, works sequentially and can only pay attention to the words in the sentence that it has already translated (so, only the words before the word currently being generated). For example, when we have predicted the first three words of the translated target, we give them to the decoder  which then uses all the inputs of the encoder to try to predict the fourth word.
+All'origine, l'architettura Transformer fu creata per la traduzione. In fase di addestramento, l'encoder riceve degli input (frasi) in una certa lingua, mentre il decoder riceve le stesse frasi nella lingua target d'elezione. Nell'encoder, gli attention layer sono in grado di utilizzare qualsiasi parola in una data frase (dato che, come abbiamo appena visto, la traduzione di una determinata parola può dipendere da ciò che la precede o segue nella frase). Diversamente, decoder procede in maniera sequenziale ed è capace di prestare attenzione solo alle parole della frase che ha già tradotto (ossia, solo le parole che precedono la parola che sta generando). Ad esempio, una volta predette le prime tre parole della frase target, le passiamo al decoder  che utilizza tutti gli input dell'encoder per provare a predirre la quarta parola.
 
-To speed things up during training (when the model has access to target sentences), the decoder is fed the whole target, but it is not allowed to use future words (if it had access to the word at position 2 when trying to predict the word at position 2, the problem would not be very hard!). For instance, when trying to predict the fourth word, the attention layer will only have access to the words in positions 1 to 3.
+Per accelerare il processo di addestramento (quando il modello ha accesso alle frasi target), l'intero target viene fornito al decoder, che però non è in grado di accedere alle parole future (se avesse accesso alla parola in seconda posizione mentre cerca di predirre la parola in seconda posizione, il problema cesserebbe di essere complesso). Ad esempio, mentre prova a predirre la quarta parola, l'attention layer avrà accesso solo alle posizioni tra la prima e la terza.
 
 L'architettura Transformer originale aveva la struttura qui sotto, con l'encoder a sinistra e il decoder a destra:
 

From 51dea9e944ef931c9ff16ab941b5f1d39a82a095 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 14 Jun 2022 15:25:37 +0100
Subject: [PATCH 123/127] Update 4.mdx

---
 chapters/it/chapter1/4.mdx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/chapters/it/chapter1/4.mdx b/chapters/it/chapter1/4.mdx
index 46214b6ad..81c2f95e9 100644
--- a/chapters/it/chapter1/4.mdx
+++ b/chapters/it/chapter1/4.mdx
@@ -156,9 +156,9 @@ L'architettura Transformer originale aveva la struttura qui sotto, con l'encoder
 <img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/transformers-dark.svg" alt="Architecture of a Transformers models">
 </div>
 
-Note that the first attention layer in a decoder block pays attention to all (past) inputs to the decoder, but the second attention layer uses the output of the encoder. It can thus access the whole input sentence to best predict the current word. This is very useful as different languages can have grammatical rules that put the words in different orders, or some context provided later in the sentence may be helpful to determine the best translation of a given word.
+Nota che il primo attention layer in un *decoder block* presta attenzione a tutti gli input (passati) al decoder, mentre il secondo attention layer utilizza l'output del encoder. Gli è perciò possibile avere accesso a tutta la frase input per meglio prevedere la parola corrente. Questa caratteristica è molto utile in quanto lingue diverse possono avere regole grammaticali diverse piazzano le parole in ordini diversi, oppure perché il contesto che compare più tardi nella frase potrebbe essere utile nella determinazione della migliore traduzione di una data parola.
 
-The *attention mask* can also be used in the encoder/decoder to prevent the model from paying attention to some special words -- for instance, the special padding word used to make all the inputs the same length when batching together sentences.
+L'*attention mask* può essere utilizzato anche nell'encoder/decoder per evitare che il modello presti attenzione a certe parole speciali, come ad esempio parole riempitive utilizzate per rendere tutti gli input della stessa lunghezza.
 
 ##  Architetture vs. checkpoint
 

From 3738518ba946393ee8f53fd3cfd32785623df910 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 14 Jun 2022 15:40:35 +0100
Subject: [PATCH 124/127] Final version

---
 chapters/it/chapter1/4.mdx | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/chapters/it/chapter1/4.mdx b/chapters/it/chapter1/4.mdx
index 81c2f95e9..8aa824f14 100644
--- a/chapters/it/chapter1/4.mdx
+++ b/chapters/it/chapter1/4.mdx
@@ -19,13 +19,13 @@ L'[architettura Transformer](https://arxiv.org/abs/1706.03762) è stata introdot
 
 - **febbraio 2019**: [GPT-2](https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf), una versione (migliorata e ingrandita) di GPT che non fu distribuita immediatamente al pubblico a causa di preoccupazioni etiche
 
-- **ottobre 2019**: [DistilBERT](https://arxiv.org/abs/1910.01108), una versione distillata di BERT che è il 60% più rapida, il 40% più leggera in memoria, e ritiene tuttavia il 97% della performance di BERT
+- **ottobre 2019**: [DistilBERT](https://arxiv.org/abs/1910.01108), una versione distillata di BERT che è il 60% più rapida e il 40% più leggera in memoria, pur conservando il 97% della performance di BERT
 
 - **ottobre 2019**: [BART](https://arxiv.org/abs/1910.13461) e [T5](https://arxiv.org/abs/1910.10683), due grossi modelli pre-addestrati che utilizzano la stessa architettura del modello Transformer originale (nonché i primi a farlo)
 
 - **maggio 2020**, [GPT-3](https://arxiv.org/abs/2005.14165), una versione ancora più ampia di GPT-2, con buone prestazioni in vari compiti e nessun bisogno di fine-tuning (il cosiddetto _zero-shot learning_)
 
-La lista è lontana dall'essere esaustiva ed è volta solo a mettere in evidenza alcuni dei diversi tipi di modelli Transformer. In genere, questi possono essere raggruppati in tre categorie:
+La lista è tutto fuorché esaustiva ed è volta solo a mettere in evidenza alcuni dei diversi tipi di modelli Transformer. In genere, questi possono essere raggruppati in tre categorie:
 
 - Modelli in stile GPT (detti anche modelli Transformer _auto-regressive_)
 - Modelli in stile BERT (detti anche modelli Transformer _auto-encoding_) 
@@ -35,11 +35,11 @@ Studieremo queste famiglie più nel dettaglio in seguito.
 
 ## I Transformer sono modelli linguistici
 
-Tutti i modelli Transformer menzionati qui sopra (GPT, BERT, BART, T5, ecc.) sono stati addestrati come modelli linguistici (*language models*). Ciò significa che sono stati addestrati su grandi quantità di testo grezzo in stile auto-supervisionato (*self-supervising*). L'apprendimento auto-supervisionato è un tipo di apprendimento il cui obbiettivo viene computato direttamente dagli input del modello. Ciò significa che non è richiesto intervento umano per etichettare i dati!
+Tutti i modelli Transformer menzionati qui sopra (GPT, BERT, BART, T5, ecc.) sono stati addestrati come modelli linguistici (*language models*). Ciò significa che sono stati addestrati su grandi quantità di testo grezzo in stile auto-supervisionato (*self-supervising*). L'apprendimento auto-supervisionato è un tipo di apprendimento il cui obbiettivo viene computato direttamente dagli input del modello. Ciò significa che non è richiesto alcun intervento umano per etichettare i dati!
 
-Un modello di questo tipo sviluppa una comprensione statistica della lingua alla quale è stato addestrato, ma non è molto utile in compiti pratici precisi. Per questa ragione, il modello pre-addestrato generale viene in seguito sottoposto a un processo detto transfer learning (*apprendimento del trasferimento*). Durante questo processo, il modello viene affinato per un determinato compito in maniera supervisionata (ossia utilizzando etichette generate da umani).
+Un modello di questo tipo sviluppa una comprensione statistica della lingua alla quale è stato addestrato, ma non è molto utile in compiti pratici e precisi. Per questa ragione, il modello pre-addestrato generale viene in seguito sottoposto a un processo detto *transfer learning*. Durante questo processo, il modello viene affinato per un determinato compito in maniera supervisionata (ossia utilizzando etichette generate da umani).
 
-Un esempio di compito è la previsione della parola seguente in una frase di cui sono state lette *n* parole precedenti. Quest'operazione si chiama *causal language modeling* perché il suo output dipende gli input presenti e passati, ma non da quelli futuri.
+Un esempio di compito è la previsione della parola seguente in una frase di cui sono state lette *n* parole precedenti. Quest'operazione si chiama *causal language modeling* perché il suo output dipende dagli input presenti e passati, ma non da quelli futuri.
 
 <div class="flex justify-center">
 <img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/causal_modeling.svg" alt="Example of causal language modeling in which the next word from a sentence is predicted.">
@@ -61,7 +61,7 @@ A parte per alcune eccezioni (come DistilBERT), la strategia generale per ottene
 <img src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/model_parameters.png" alt="Number of parameters of recent Transformers models" width="90%">
 </div>
 
-Sfortunatamente, l'addestramento di un modello, e specialmente di un modello grosso, richiede una grande quantità di dati. Ciò si rivela molto costoso in termini di tempo, risorse informatiche e impatto ambientale, come mostrano i grafici qui sotto.
+Sfortunatamente, l'addestramento di un modello, e specialmente di un modello grosso, richiede grandi quantità di dati. Ciò si rivela molto costoso in termini di tempo, risorse informatiche e impatto ambientale, come mostrano i grafici qui sotto.
 
 <div class="flex justify-center">
 <img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter1/carbon_footprint.svg" alt="The carbon footprint of a large language model.">
@@ -70,9 +70,9 @@ Sfortunatamente, l'addestramento di un modello, e specialmente di un modello gro
 
 <Youtube id="ftWlj4FBHTg"/>
 
-E questi dati sono di un progetto per un modello (molto grande) condotto da un team che provava consciamente a ridurre l'impatto ambientale del pre-addestramento! L'impronta di trials volti a ottenere i miglior iperparamenti possibili sarebbe ancora più importante.
+Questi dati si riferiscono a un progetto per un modello (molto grande) condotto da un team che provava consciamente a ridurre l'impatto ambientale del pre-addestramento. L'impronta di trials volti a ottenere i miglior iperparamenti possibili sarebbe ancora più importante.
 
-Immagina cosa succederebbe se ogni volta che un gruppo di ricerca, un'organizzazione studentesca o un'azienda vuole addestrare un modello lo facesse da zero. I costi globali sarebbero inutilmente enormi!
+Immagina cosa succederebbe se ogni volta che un gruppo di ricerca, un'organizzazione studentesca o un'azienda vuole addestrare un modello lo facesse da zero! I costi globali sarebbero inutilmente enormi!
 
 Questo è il motivo per cui la condivisione di modelli linguistici è fondamentale: lavorare a partire da modelli già addestrati riduce i costi informatici complessivi e l'impatto ambientale della comunità.
 
@@ -90,7 +90,7 @@ Il pre-addestramento è l'atto di addestrare un modello da zero: i pesi sono ini
 
 Questo pre-addestramento è solitamente fatto su enormi quantità di dati. Di conseguenza, l'addestramento richiede un corpus di dati molto ampio e può prendere diverse settimane.
 
-L'affinamento (*fine-tuning*), al contrarion, è un addestramento che ha luogo **dopo** che il modello è stato pre-addestrato. Per poter performare un fine-tuning, è necessario acquisire un modello linguistico pre-addestrato e addestrarlo ulteriormente con una base dati adatta al compito in questione. Ma perché non addestrare direttamente al compito finale? Esistono alcune ragioni:
+L'affinamento (*fine-tuning*), al contrario, è un addestramento che ha luogo **dopo** che il modello è stato pre-addestrato. Per poter effettuare un fine-tuning, è necessario acquisire un modello linguistico pre-addestrato e addestrarlo ulteriormente con una base dati adatta al compito in questione. Ma perché non addestrare direttamente al compito finale? Esistono alcune ragioni:
 
 *  Il modello pre-addestrato è già addestrato su basi dati che contengono similarità con la base dati usata per il fine-tuning. Il processo di fine-tuning riesce quindi ad beneficiare della conoscenza acquisita dal modello iniziale durante il pre-addestramento (ad esempio, nei problemi di NLP, il modello pre-addestrato avrà già conoscenze statistiche della lingua utilizzata nel compito).
 *  Siccome il modello pre-addestrato è stato addestrato usando moltissimi dati, il fine-tuning richiede molto meno dati per ottenere buoni risultati.

From 386bc29de788efe8971d8ba0ff1f9d6cbf093cc3 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 14 Jun 2022 15:41:40 +0100
Subject: [PATCH 125/127] Final version

---
 chapters/it/chapter1/5.mdx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/chapters/it/chapter1/5.mdx b/chapters/it/chapter1/5.mdx
index 4c90404a2..817c81463 100644
--- a/chapters/it/chapter1/5.mdx
+++ b/chapters/it/chapter1/5.mdx
@@ -2,11 +2,11 @@
 
 <Youtube id="MUqNwgPjJvQ" />
 
-I modelli encoder utilizzano solo l'encoder di un modello Transformer. In ogni fase, i layer di attenzione hanno accesso a tutte le parole della frase di partenza. Questi modelli sono spesso caratterizzati come aventi attenzione "bi-direzionale" e chiamati *auto-encoding models*.
+I modelli encoder utilizzano solo l'encoder di un modello Transformer. In ogni fase, gli attention layer hanno accesso a tutte le parole della frase di partenza. Questi modelli sono spesso caratterizzati come aventi attenzione "bi-direzionale" e chiamati *auto-encoding models*.
 
 Solitamente, il pre-addestramento di questi modelli consiste nel corrompere una determinata frase (ad esempio, nascondendone casualmente alcune parole) e incaricare il modello di ritrovare o ricostruire la frase di partenza.
 
-I modelli encoder sono particolarmente appropriati per compiti che rischiedono la comprensione di frasi intere, quali la classificazione di frasi, riconoscimento delle entità nominate (e in senso più ampio, la classificazione di parole), e l'estrazione di risposte da un contesto.
+I modelli encoder sono particolarmente appropriati per compiti che richiedono la comprensione di frasi intere, quali la classificazione di frasi, riconoscimento delle entità nominate (e in senso più ampio, la classificazione di parole), e l'estrazione di risposte da un contesto.
 
 Alcuni esempi di modelli di questo tipo includono:
 

From 8981c0a6a9f9160fae543298ec89797623f5c955 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 14 Jun 2022 15:42:31 +0100
Subject: [PATCH 126/127] Final version

---
 chapters/it/chapter1/6.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/chapters/it/chapter1/6.mdx b/chapters/it/chapter1/6.mdx
index a04e5504a..c9a7296a4 100644
--- a/chapters/it/chapter1/6.mdx
+++ b/chapters/it/chapter1/6.mdx
@@ -2,7 +2,7 @@
 
 <Youtube id="d_ixlCubqQw" />
 
-I modelli decoder utilizzano solo il decoder di un modello Transformer. Ad ogni passaggio e per una data parola, i layers di attenzione hanno accesso solo alle parole che la precedono nella frase. Questi modelli sono spesso detti *auto-regressive models*.
+I modelli decoder utilizzano solo il decoder di un modello Transformer. Ad ogni passaggio e per una data parola, gli attention layer hanno accesso solo alle parole che la precedono nella frase. Questi modelli sono spesso detti *auto-regressive models*.
 
 Il pre-addestramento dei modelli decoder ha spesso a che fare con la previsione della parola successiva in un contesto frasale.
 

From 60ab29e8ffead84679882ce8c05e91917f0910e9 Mon Sep 17 00:00:00 2001
From: Caterina Bonan <97481648+CaterinaBi@users.noreply.github.com>
Date: Tue, 14 Jun 2022 16:17:23 +0100
Subject: [PATCH 127/127] Updates ToC

---
 chapters/it/_toctree.yml | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/chapters/it/_toctree.yml b/chapters/it/_toctree.yml
index e1b12f89b..a08938643 100644
--- a/chapters/it/_toctree.yml
+++ b/chapters/it/_toctree.yml
@@ -16,4 +16,20 @@
   - local: chapter1/5
     title: Modelli encoder
   - local: chapter1/6
-    title: Modelli decoder
\ No newline at end of file
+    title: Modelli decoder
+    
+- title: 4. Condividere modelli e tokenizers
+  sections:
+  - local: chapter4/1
+    title: L'Hub di Hugging Face
+  - local: chapter4/2
+    title: Usare modelli pre-addestrati
+  - local: chapter4/3
+    title: Condividere modelli pre-addestrati
+  - local: chapter4/4
+    title: Scrivere un cartellino del modello
+  - local: chapter4/5
+    title: Fine della parte 1!
+  - local: chapter4/6
+    title: Quiz di fine capitolo
+    quiz: 4