huggingface · IlyasMoutawwakil · Aug 6, 2024 · Jul 31, 2024 · Jul 31, 2024 · Jul 31, 2024
diff --git a/.github/workflows/test_ipex.yml b/.github/workflows/test_ipex.yml
@@ -22,27 +22,27 @@ jobs:
       fail-fast: false
       matrix:
         python-version: [3.9]
-        transformers-version: [4.39.0, 4.42.3]
-        ipex-version: [2.2.0, 2.3.*]
+        transformers-version: ["4.39.0", "4.43.*"]
+        ipex-version: ["2.2.0", "2.3.*"]
         include:
           - python-version: 3.8
             transformers-version: 4.39.0
             ipex-version: 2.2.0
 
     steps:
-    - uses: actions/checkout@v2
-    - name: Setup Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v2
-      with:
-        python-version: ${{ matrix.python-version }}
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        pip install torch==${{ matrix.ipex-version }} --extra-index-url https://download.pytorch.org/whl/cpu
-        pip install intel_extension_for_pytorch==${{ matrix.ipex-version }}
-        pip install Pillow parameterized
-        pip install transformers[testing]==${{ matrix.transformers-version }}
-        pip install .[ipex]
-    - name: Test with Pytest
-      run: |
-        pytest tests/ipex/
+      - uses: actions/checkout@v2
+      - name: Setup Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install torch==${{ matrix.ipex-version }} --extra-index-url https://download.pytorch.org/whl/cpu
+          pip install intel_extension_for_pytorch==${{ matrix.ipex-version }}
+          pip install Pillow parameterized
+          pip install transformers[testing]==${{ matrix.transformers-version }}
+          pip install .[ipex]
+      - name: Test with Pytest
+        run: |
+          pytest tests/ipex/
diff --git a/.github/workflows/test_openvino.yml b/.github/workflows/test_openvino.yml
@@ -21,36 +21,37 @@ jobs:
       fail-fast: false
       matrix:
         python-version: ["3.8", "3.12"]
-        transformers-version: ["4.36.0", "4.42.*"]
+        transformers-version: ["4.36.0", "4.43.*"]
         os: [ubuntu-latest]
 
     runs-on: ${{ matrix.os }}
     steps:
-    - uses: actions/checkout@v4
-    - name: Setup Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v5
-      with:
-        python-version: ${{ matrix.python-version }}
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        # install PyTorch CPU version to avoid installing CUDA packages on GitHub runner without GPU
-        pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
-        pip install transformers==${{ matrix.transformers-version }}
-        pip install .[openvino,openvino-tokenizers,tests,diffusers] onnxruntime
-    - name: Test with Pytest
-      env:
-        HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
-      run: |
-        pytest tests/openvino/ --ignore tests/openvino/test_modeling_basic.py --durations=0
-    - name: Test basic
-      run: |
-        pip uninstall -y nncf
-        pytest tests/openvino/test_modeling_basic.py
-    - name: Test openvino-nightly
-      run: |
-        pip uninstall -y openvino
-        pip install openvino-nightly
-        python -c "from optimum.intel import OVModelForCausalLM; OVModelForCausalLM.from_pretrained('hf-internal-testing/tiny-random-gpt2', export=True, compile=False)"
-        optimum-cli export openvino -m hf-internal-testing/tiny-random-gpt2 gpt2-ov
+      - uses: actions/checkout@v4
+      - name: Setup Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
 
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          # install PyTorch CPU version to avoid installing CUDA packages on GitHub runner without GPU
+          pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
+          pip install .[openvino,openvino-tokenizers,tests,diffusers] onnxruntime
+          pip install transformers==${{ matrix.transformers-version }}
+
+      - name: Test with Pytest
+        env:
+          HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
+        run: |
+          pytest tests/openvino/ --ignore tests/openvino/test_modeling_basic.py --durations=0
+      - name: Test basic
+        run: |
+          pip uninstall -y nncf
+          pytest tests/openvino/test_modeling_basic.py
+      - name: Test openvino-nightly
+        run: |
+          pip uninstall -y openvino
+          pip install openvino-nightly
+          python -c "from optimum.intel import OVModelForCausalLM; OVModelForCausalLM.from_pretrained('hf-internal-testing/tiny-random-gpt2', export=True, compile=False)"
+          optimum-cli export openvino -m hf-internal-testing/tiny-random-gpt2 gpt2-ov
diff --git a/.github/workflows/test_openvino_basic.yml b/.github/workflows/test_openvino_basic.yml
@@ -3,7 +3,7 @@ name: OpenVINO - Basic Test
 on:
   workflow_dispatch:
   schedule:
-    - cron:  '41 1 * * *'  # run every day at 1:41
+    - cron: "41 1 * * *" # run every day at 1:41
   push:
     branches:
       - v*-release
@@ -24,40 +24,41 @@ jobs:
         # This also ensures that the test fails if dependencies break for Python 3.7
         python-version: ["3.8", "3.12"]
         os: ["ubuntu-22.04", "windows-latest"]
-        transformers-version: ["4.42.*"]
+        transformers-version: ["4.43.*"]
         include:
-          - transformers-version: "4.36.0"
-            python-version: "3.12"
+          - python-version: "3.12"
             os: "ubuntu-22.04"
+            transformers-version: "4.36.0"
 
     runs-on: ${{ matrix.os }}
 
     steps:
-    - uses: actions/checkout@v4
-    - name: Setup Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v5
-      with:
-        python-version: ${{ matrix.python-version }}
-
-    - name: Install dependencies
-      run: |
-        # Install openvino manually to prevent dependency conflicts when .[openvino] pins
-        # optimum or transformers to a specific version
-        # Install PyTorch CPU to prevent unnecessary downloading/installing of CUDA packages
-        pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
-        pip install transformers==${{ matrix.transformers-version }}
-        pip install .[tests] openvino
-
-    - name: Pip freeze
-      run: pip freeze
-
-    - name: Test with Pytest
-      run: |
-        pytest tests/openvino/test_modeling_basic.py
-
-    - name: Slow tests
-      run: |
-        pip install nncf
-        pytest tests/openvino -s -m "run_slow" --durations=0
-      env:
-        RUN_SLOW: 1
+      - uses: actions/checkout@v4
+      - name: Setup Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          # Install PyTorch CPU to prevent unnecessary downloading/installing of CUDA packages
+          pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
+          # Install openvino manually to prevent dependency conflicts when .[openvino] pins
+          # optimum or transformers to a specific version
+          pip install .[tests] openvino
+          pip install transformers==${{ matrix.transformers-version }}
+
+      - name: Pip freeze
+        run: pip freeze
+
+      - name: Test with Pytest
+        run: |
+          pytest tests/openvino/test_modeling_basic.py
+
+      - name: Slow tests
+        run: |
+          pip install nncf
+          pytest tests/openvino -s -m "run_slow" --durations=0
+        env:
+          RUN_SLOW: 1
diff --git a/optimum/exporters/ipex/model_patcher.py b/optimum/exporters/ipex/model_patcher.py
@@ -34,7 +34,7 @@
 
 # Please also update in the setup.py and .github/workflows/test_ipex.yml if you change the transformers version
 _TRANSFORMERS_MIN_VERSION = "4.39.0"
-_TRANSFORMERS_MAX_VERSION = "4.42.3"
+_TRANSFORMERS_MAX_VERSION = "4.43.99"
 
 _IPEX_EXPORTED_GENERATION_TASKS = ("text-generation",)
 

diff --git a/optimum/intel/ipex/modeling_base.py b/optimum/intel/ipex/modeling_base.py
@@ -474,9 +474,11 @@ def __init__(
             self._reorder_cache = _ipex_reorder_cache
         else:
             # Check if _reorder_cache is a static method
-            if isinstance(self.model_cls.__dict__["_reorder_cache"], staticmethod):
+            if "_reorder_cache" in self.model_cls.__dict__ and isinstance(
+                self.model_cls.__dict__["_reorder_cache"], staticmethod
+            ):
                 self._reorder_cache = self.model_cls._reorder_cache
-            else:
+            elif "_reorder_cache" in self.model_cls.__dict__:
                 self._reorder_cache = self.model_cls._reorder_cache.__get__(self)
 
         if is_transformers_version(">=", "4.38.0") and model_type in {"llama", "phi", "persimmon", "mistral"}:

diff --git a/optimum/intel/openvino/modeling.py b/optimum/intel/openvino/modeling.py
@@ -129,7 +129,6 @@ def __init__(self, model: openvino.runtime.Model, config: transformers.Pretraine
         # Avoid warnings when creating a transformers pipeline
         AutoConfig.register(self.base_model_prefix, AutoConfig)
         self.auto_model_class.register(AutoConfig, self.__class__)
-        self.device = torch.device("cpu")
 
     def to(self, device: str):
         """

diff --git a/optimum/intel/openvino/modeling_base.py b/optimum/intel/openvino/modeling_base.py
@@ -20,6 +20,7 @@
 from typing import Dict, Optional, Union
 
 import openvino
+import torch
 from huggingface_hub import hf_hub_download
 from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
 from openvino import Core, convert_model
@@ -34,7 +35,7 @@
 from ...exporters.openvino import export, main_export
 from ..utils.import_utils import is_nncf_available
 from .configuration import OVConfig, OVDynamicQuantizationConfig, OVWeightQuantizationConfig
-from .utils import ONNX_WEIGHTS_NAME, OV_XML_FILE_NAME, _print_compiled_model_properties
+from .utils import ONNX_WEIGHTS_NAME, OV_TO_PT_TYPE, OV_XML_FILE_NAME, _print_compiled_model_properties
 
 
 core = Core()
@@ -77,16 +78,27 @@ def __init__(
             model = self._reshape(model, -1, -1, height, width)
 
         input_names = {}
+        input_dtypes = {}
         for idx, key in enumerate(model.inputs):
             names = tuple(key.get_names())
             input_names[next((name for name in names if "/" not in name), names[0])] = idx
+            input_dtypes[
+                next((name for name in names if "/" not in name), names[0])
+            ] = key.get_element_type().get_type_name()
         self.input_names = input_names
+        self.input_dtypes = input_dtypes
 
         output_names = {}
+        output_dtypes = {}
         for idx, key in enumerate(model.outputs):
             names = tuple(key.get_names())
             output_names[next((name for name in names if "/" not in name), names[0])] = idx
+            output_dtypes[
+                next((name for name in names if "/" not in name), names[0])
+            ] = key.get_element_type().get_type_name()
+
         self.output_names = output_names
+        self.output_dtypes = output_dtypes
 
         self.model = model
         self.request = None
@@ -103,6 +115,27 @@ def __init__(
         if enable_compilation:
             self.compile()
 
+    @property
+    def device(self) -> torch.device:
+        """
+        `torch.device`: The device on which the module is (for torch compatibility).
+        """
+        return torch.device("cpu")
+
+    @property
+    def dtype(self) -> Optional[torch.dtype]:
+        for dtype in self.input_dtypes.values():
+            torch_dtype = OV_TO_PT_TYPE.get(dtype)
+            if torch_dtype.is_floating_point:
+                return torch_dtype
+
+        for dtype in self.output_dtypes.values():
+            torch_dtype = OV_TO_PT_TYPE.get(dtype)
+            if torch_dtype.is_floating_point:
+                return torch_dtype
+
+        return None
+
     @staticmethod
     def load_model(
         file_name: Union[str, Path],

diff --git a/optimum/intel/openvino/modeling_base_seq2seq.py b/optimum/intel/openvino/modeling_base_seq2seq.py
@@ -350,6 +350,8 @@ def _reshape(self, model: openvino.runtime.Model, batch_size: int, sequence_leng
             shapes[inputs][0] = batch_size if not is_decoder else -1
             if inputs.get_any_name().startswith("past_key_values"):
                 shapes[inputs][2] = -1
+            elif inputs.get_any_name().startswith("cache_position"):
+                shapes[inputs][0] = sequence_length
             elif is_decoder and not inputs.get_any_name().startswith("encoder"):
                 shapes[inputs][1] = -1
             else:

diff --git a/optimum/intel/openvino/modeling_diffusion.py b/optimum/intel/openvino/modeling_diffusion.py
@@ -25,6 +25,7 @@
 import numpy as np
 import openvino
 import PIL
+import torch
 from diffusers import (
     DDIMScheduler,
     LMSDiscreteScheduler,
@@ -422,10 +423,6 @@ def to(self, device: str):
 
         return self
 
-    @property
-    def device(self) -> str:
-        return self._device.lower()
-
     @property
     def height(self) -> int:
         height = self.unet.model.inputs[0].get_partial_shape()[2]
@@ -631,21 +628,25 @@ def _compile(self):
             if (
                 "CACHE_DIR" not in self.ov_config.keys()
                 and not str(self._model_dir).startswith(gettempdir())
-                and "gpu" in self.device.lower()
+                and "GPU" in self._device
             ):
                 self.ov_config["CACHE_DIR"] = os.path.join(self._model_dir, self._model_name, "model_cache")
 
-            logger.info(f"Compiling the {self._model_name} to {self.device} ...")
-            self.request = core.compile_model(self.model, self.device, self.ov_config)
+            logger.info(f"Compiling the {self._model_name} to {self._device} ...")
+            self.request = core.compile_model(self.model, self._device, self.ov_config)
             # OPENVINO_LOG_LEVEL can be found in https://docs.openvino.ai/2023.2/openvino_docs_OV_UG_supported_plugins_AUTO_debugging.html
             if "OPENVINO_LOG_LEVEL" in os.environ and int(os.environ["OPENVINO_LOG_LEVEL"]) > 2:
-                logger.info(f"{self.device} SUPPORTED_PROPERTIES:")
+                logger.info(f"{self._device} SUPPORTED_PROPERTIES:")
                 _print_compiled_model_properties(self.request)
 
     @property
-    def device(self):
+    def _device(self) -> str:
         return self.parent_model._device
 
+    @property
+    def device(self) -> torch.device:
+        return self.parent_model.device
+
 
 class OVModelTextEncoder(OVModelPart):
     def __init__(
@@ -717,7 +718,7 @@ def __call__(self, latent_sample: np.ndarray):
         return list(outputs.values())
 
     def _compile(self):
-        if "GPU" in self.device:
+        if "GPU" in self._device:
             self.ov_config.update({"INFERENCE_PRECISION_HINT": "f32"})
         super()._compile()
 
@@ -738,7 +739,7 @@ def __call__(self, sample: np.ndarray):
         return list(outputs.values())
 
     def _compile(self):
-        if "GPU" in self.device:
+        if "GPU" in self._device:
             self.ov_config.update({"INFERENCE_PRECISION_HINT": "f32"})
         super()._compile()