Skip to content

Commit

Permalink
LLaVA OV: fix unpadding precision (huggingface#34779)
Browse files Browse the repository at this point in the history
* fix

* propagate

* type check
  • Loading branch information
zucchini-nlp authored and BernardZach committed Dec 5, 2024
1 parent ae1497a commit 87015b1
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 3 deletions.
4 changes: 3 additions & 1 deletion src/transformers/models/llava_next/processing_llava_next.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,9 @@ def __call__(
for sample in text:
while self.image_token in sample:
image_size = next(image_sizes)
orig_height, orig_width = image_size
if not isinstance(image_size, (list, tuple)):
# cast to list to avoid numerical precision errors when calculating unpadding
orig_height, orig_width = image_size.tolist()
num_image_tokens = self._get_number_of_features(orig_height, orig_width, height, width)
if self.vision_feature_select_strategy == "default":
num_image_tokens -= self.num_additional_image_tokens
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,9 @@ def __call__(
for sample in text:
while self.image_token in sample:
image_size = next(image_sizes)
orig_height, orig_width = image_size
if not isinstance(image_size, (list, tuple)):
# cast to list to avoid numerical precision errors when calculating unpadding
orig_height, orig_width = image_size.tolist()
num_image_tokens = self._get_number_of_features(orig_height, orig_width, height, width)
if self.vision_feature_select_strategy == "default":
num_image_tokens -= self.num_additional_image_tokens
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,10 @@ def _expand_image_tokens(
for sample in text:
while special_token in sample:
image_size_list = next(image_sizes)
orig_height, orig_width = image_size_list[0] if num_frames != 1 else image_size_list
original_size = image_size_list[0] if num_frames != 1 else image_size_list
if not isinstance(original_size, (list, tuple)):
# cast to list to avoid numerical precision errors when calculating unpadding
orig_height, orig_width = original_size.tolist()
num_image_tokens = self._get_number_of_features(orig_height, orig_width, height, width)
if self.vision_feature_select_strategy == "default":
num_image_tokens -= 1
Expand Down

0 comments on commit 87015b1

Please sign in to comment.