From a3d69a8994d673899608a7c17fbf4f953f50474e Mon Sep 17 00:00:00 2001 From: Fanli Lin Date: Thu, 14 Nov 2024 06:16:59 +0800 Subject: [PATCH] [docs] add xpu device check (#34684) * add XPU path * use accelerate API * Update docs/source/en/tasks/semantic_segmentation.md Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com> * update more places with accelerate API --------- Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com> --- docs/source/en/generation_strategies.md | 3 ++- docs/source/en/tasks/idefics.md | 4 ++-- docs/source/en/tasks/image_captioning.md | 5 +++-- docs/source/en/tasks/image_feature_extraction.md | 5 +++-- docs/source/en/tasks/image_to_image.md | 5 +++-- .../knowledge_distillation_for_image_classification.md | 4 ++-- docs/source/en/tasks/mask_generation.md | 6 +++--- docs/source/en/tasks/monocular_depth_estimation.md | 5 +++-- docs/source/en/tasks/object_detection.md | 4 +++- docs/source/en/tasks/semantic_segmentation.md | 4 +++- docs/source/en/tasks/text-to-speech.md | 4 ++-- docs/source/en/tasks/visual_question_answering.md | 3 ++- 12 files changed, 31 insertions(+), 21 deletions(-) diff --git a/docs/source/en/generation_strategies.md b/docs/source/en/generation_strategies.md index f892fc05f5c..621edeb20e8 100644 --- a/docs/source/en/generation_strategies.md +++ b/docs/source/en/generation_strategies.md @@ -508,10 +508,11 @@ See the following examples for DoLa decoding with the 32-layer LLaMA-7B model. ```python >>> from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed >>> import torch +>>> from accelerate.test_utils.testing import get_backend >>> tokenizer = AutoTokenizer.from_pretrained("huggyllama/llama-7b") >>> model = AutoModelForCausalLM.from_pretrained("huggyllama/llama-7b", torch_dtype=torch.float16) ->>> device = 'cuda' if torch.cuda.is_available() else 'cpu' +>>> device, _, _ = get_backend() # automatically detects the underlying device type (CUDA, CPU, XPU, MPS, etc.) >>> model.to(device) >>> set_seed(42) diff --git a/docs/source/en/tasks/idefics.md b/docs/source/en/tasks/idefics.md index a780124edea..7e3335762ea 100644 --- a/docs/source/en/tasks/idefics.md +++ b/docs/source/en/tasks/idefics.md @@ -386,9 +386,9 @@ The use and prompting for the conversational use is very similar to using the ba ```py >>> import torch >>> from transformers import IdeficsForVisionText2Text, AutoProcessor +>>> from accelerate.test_utils.testing import get_backend ->>> device = "cuda" if torch.cuda.is_available() else "cpu" - +>>> device, _, _ = get_backend() # automatically detects the underlying device type (CUDA, CPU, XPU, MPS, etc.) >>> checkpoint = "HuggingFaceM4/idefics-9b-instruct" >>> model = IdeficsForVisionText2Text.from_pretrained(checkpoint, torch_dtype=torch.bfloat16).to(device) >>> processor = AutoProcessor.from_pretrained(checkpoint) diff --git a/docs/source/en/tasks/image_captioning.md b/docs/source/en/tasks/image_captioning.md index 633ccc491eb..9a78967cb51 100644 --- a/docs/source/en/tasks/image_captioning.md +++ b/docs/source/en/tasks/image_captioning.md @@ -256,8 +256,9 @@ image Prepare image for the model. ```python -device = "cuda" if torch.cuda.is_available() else "cpu" - +from accelerate.test_utils.testing import get_backend +# automatically detects the underlying device type (CUDA, CPU, XPU, MPS, etc.) +device, _, _ = get_backend() inputs = processor(images=image, return_tensors="pt").to(device) pixel_values = inputs.pixel_values ``` diff --git a/docs/source/en/tasks/image_feature_extraction.md b/docs/source/en/tasks/image_feature_extraction.md index c9d794b0b2b..80b701588b2 100644 --- a/docs/source/en/tasks/image_feature_extraction.md +++ b/docs/source/en/tasks/image_feature_extraction.md @@ -43,8 +43,9 @@ Let's see the pipeline in action. First, initialize the pipeline. If you don't p ```python import torch from transformers import pipeline - -DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu') +from accelerate.test_utils.testing import get_backend +# automatically detects the underlying device type (CUDA, CPU, XPU, MPS, etc.) +DEVICE, _, _ = get_backend() pipe = pipeline(task="image-feature-extraction", model_name="google/vit-base-patch16-384", device=DEVICE, pool=True) ``` diff --git a/docs/source/en/tasks/image_to_image.md b/docs/source/en/tasks/image_to_image.md index 0bb74b36980..f1c62e47aeb 100644 --- a/docs/source/en/tasks/image_to_image.md +++ b/docs/source/en/tasks/image_to_image.md @@ -37,8 +37,9 @@ We can now initialize the pipeline with a [Swin2SR model](https://huggingface.co ```python from transformers import pipeline import torch - -device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') +from accelerate.test_utils.testing import get_backend +# automatically detects the underlying device type (CUDA, CPU, XPU, MPS, etc.) +device, _, _ = get_backend() pipe = pipeline(task="image-to-image", model="caidas/swin2SR-lightweight-x2-64", device=device) ``` diff --git a/docs/source/en/tasks/knowledge_distillation_for_image_classification.md b/docs/source/en/tasks/knowledge_distillation_for_image_classification.md index 530e92d81f5..17fb363df8e 100644 --- a/docs/source/en/tasks/knowledge_distillation_for_image_classification.md +++ b/docs/source/en/tasks/knowledge_distillation_for_image_classification.md @@ -58,7 +58,7 @@ from transformers import TrainingArguments, Trainer import torch import torch.nn as nn import torch.nn.functional as F - +from accelerate.test_utils.testing import get_backend class ImageDistilTrainer(Trainer): def __init__(self, teacher_model=None, student_model=None, temperature=None, lambda_param=None, *args, **kwargs): @@ -66,7 +66,7 @@ class ImageDistilTrainer(Trainer): self.teacher = teacher_model self.student = student_model self.loss_function = nn.KLDivLoss(reduction="batchmean") - device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + device, _, _ = get_backend() # automatically detects the underlying device type (CUDA, CPU, XPU, MPS, etc.) self.teacher.to(device) self.teacher.eval() self.temperature = temperature diff --git a/docs/source/en/tasks/mask_generation.md b/docs/source/en/tasks/mask_generation.md index 82202f58bca..db16e035e30 100644 --- a/docs/source/en/tasks/mask_generation.md +++ b/docs/source/en/tasks/mask_generation.md @@ -125,9 +125,9 @@ the processor. ```python from transformers import SamModel, SamProcessor import torch - -device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') - +from accelerate.test_utils.testing import get_backend +# automatically detects the underlying device type (CUDA, CPU, XPU, MPS, etc.) +device, _, _ = get_backend() model = SamModel.from_pretrained("facebook/sam-vit-base").to(device) processor = SamProcessor.from_pretrained("facebook/sam-vit-base") ``` diff --git a/docs/source/en/tasks/monocular_depth_estimation.md b/docs/source/en/tasks/monocular_depth_estimation.md index 3ded3179154..edd22122f32 100644 --- a/docs/source/en/tasks/monocular_depth_estimation.md +++ b/docs/source/en/tasks/monocular_depth_estimation.md @@ -53,8 +53,9 @@ Instantiate a pipeline from a [checkpoint on the Hugging Face Hub](https://huggi ```py >>> from transformers import pipeline >>> import torch - ->>> device = "cuda" if torch.cuda.is_available() else "cpu" +>>> from accelerate.test_utils.testing import get_backend +# automatically detects the underlying device type (CUDA, CPU, XPU, MPS, etc.) +>>> device, _, _ = get_backend() >>> checkpoint = "depth-anything/Depth-Anything-V2-base-hf" >>> pipe = pipeline("depth-estimation", model=checkpoint, device=device) ``` diff --git a/docs/source/en/tasks/object_detection.md b/docs/source/en/tasks/object_detection.md index fdc81896bc1..c307dd3334f 100644 --- a/docs/source/en/tasks/object_detection.md +++ b/docs/source/en/tasks/object_detection.md @@ -1488,7 +1488,9 @@ Now that you have finetuned a model, evaluated it, and uploaded it to the Huggin Load model and image processor from the Hugging Face Hub (skip to use already trained in this session): ```py ->>> device = "cuda" +>>> from accelerate.test_utils.testing import get_backend +# automatically detects the underlying device type (CUDA, CPU, XPU, MPS, etc.) +>>> device, _, _ = get_backend() >>> model_repo = "qubvel-hf/detr_finetuned_cppe5" >>> image_processor = AutoImageProcessor.from_pretrained(model_repo) diff --git a/docs/source/en/tasks/semantic_segmentation.md b/docs/source/en/tasks/semantic_segmentation.md index 91257758948..a21ff62edf1 100644 --- a/docs/source/en/tasks/semantic_segmentation.md +++ b/docs/source/en/tasks/semantic_segmentation.md @@ -689,7 +689,9 @@ Reload the dataset and load an image for inference. We will now see how to infer without a pipeline. Process the image with an image processor and place the `pixel_values` on a GPU: ```py ->>> device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # use GPU if available, otherwise use a CPU +>>> from accelerate.test_utils.testing import get_backend +# automatically detects the underlying device type (CUDA, CPU, XPU, MPS, etc.) +>>> device, _, _ = get_backend() >>> encoding = image_processor(image, return_tensors="pt") >>> pixel_values = encoding.pixel_values.to(device) ``` diff --git a/docs/source/en/tasks/text-to-speech.md b/docs/source/en/tasks/text-to-speech.md index 188d4ea5f9e..e25da4e19ef 100644 --- a/docs/source/en/tasks/text-to-speech.md +++ b/docs/source/en/tasks/text-to-speech.md @@ -282,10 +282,10 @@ containing the corresponding speaker embedding. >>> import os >>> import torch >>> from speechbrain.inference.classifiers import EncoderClassifier +>>> from accelerate.test_utils.testing import get_backend >>> spk_model_name = "speechbrain/spkrec-xvect-voxceleb" - ->>> device = "cuda" if torch.cuda.is_available() else "cpu" +>>> device, _, _ = get_backend() # automatically detects the underlying device type (CUDA, CPU, XPU, MPS, etc.) >>> speaker_model = EncoderClassifier.from_hparams( ... source=spk_model_name, ... run_opts={"device": device}, diff --git a/docs/source/en/tasks/visual_question_answering.md b/docs/source/en/tasks/visual_question_answering.md index 7083d8c98b9..87dbfb751bf 100644 --- a/docs/source/en/tasks/visual_question_answering.md +++ b/docs/source/en/tasks/visual_question_answering.md @@ -363,10 +363,11 @@ GPU, if available, which we didn't need to do earlier when training, as [`Traine ```py >>> from transformers import AutoProcessor, Blip2ForConditionalGeneration >>> import torch +>>> from accelerate.test_utils.testing import get_backend >>> processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-2.7b") >>> model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16) ->>> device = "cuda" if torch.cuda.is_available() else "cpu" +>>> device, _, _ = get_backend() # automatically detects the underlying device type (CUDA, CPU, XPU, MPS, etc.) >>> model.to(device) ```