Skip to content

Commit

Permalink
Add eva02 tests (PaddlePaddle#168)
Browse files Browse the repository at this point in the history
  • Loading branch information
nemonameless authored Sep 19, 2023
1 parent bc3428d commit a3243e5
Show file tree
Hide file tree
Showing 8 changed files with 436 additions and 59 deletions.
7 changes: 1 addition & 6 deletions paddlemix/examples/eva02/run_eva02_pretrain_dist.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
import os
import sys

import numpy as np

parent_path = os.path.abspath(os.path.join(__file__, *([".."] * 4)))
sys.path.insert(0, parent_path)
import pprint
Expand Down Expand Up @@ -308,15 +306,12 @@ def __init__(self, processor, mode="train"):

def __call__(self, data_list):
images = [sample[0] for sample in data_list]
labels = [sample[-1] for sample in data_list]
# get labels from teacher's clip_features
batch = self.processor(
images=images,
return_tensors="pd",
mode=self.mode,
)
batch.update(
{"labels": paddle.to_tensor(np.array(labels))},
)
return batch


Expand Down
11 changes: 1 addition & 10 deletions paddlemix/models/clip/modules/fusedln.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# import sys
# your should add the path to sys.path if the root isn't in the system ENV "PATH"
# sys.path.insert(
# 0,
# '/usr/local/lib/python3.7/site-packages/fast_ln-0.0.0-py3.7-linux-x86_64.egg/'
# )
# sys.path.insert(
# 0,
# '/usr/local/lib/python3.7/site-packages/fused_ln-0.0.0-py3.7-linux-x86_64.egg/'
# )

import distutils.util
import importlib
import os
Expand Down
15 changes: 9 additions & 6 deletions paddlemix/models/eva02/modeling_pretrain.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ class EVA02VisionTransformerForMIMConfig(PretrainedConfig):

def __init__(
self,
img_size=224,
image_size=224,
patch_size=14,
in_chans=3,
embed_dim=768,
Expand Down Expand Up @@ -95,7 +95,7 @@ def __init__(
):
kwargs["return_dict"] = kwargs.pop("return_dict", True)
super().__init__(**kwargs)
self.img_size = img_size
self.image_size = image_size
self.patch_size = patch_size
self.in_chans = in_chans
self.embed_dim = embed_dim
Expand Down Expand Up @@ -156,7 +156,7 @@ class EVA02VisionTransformerForMIMPretrainedModel(MixPretrainedModel):
class EVA02VisionTransformerForMIM(EVA02VisionTransformerForMIMPretrainedModel):
def __init__(self, config: EVA02VisionTransformerForMIMConfig):
super(EVA02VisionTransformerForMIM, self).__init__(config)
self.image_size = config.img_size
self.image_size = config.image_size
self.enable_recompute = config.enable_recompute
self.embed_dim = embed_dim = config.embed_dim
self.swiglu = config.swiglu
Expand Down Expand Up @@ -205,7 +205,7 @@ def __init__(self, config: EVA02VisionTransformerForMIMConfig):

if config.rope:
half_head_dim = embed_dim // num_heads // 2
hw_seq_len = config.img_size // config.patch_size
hw_seq_len = config.image_size // config.patch_size
self.rope = VisionRotaryEmbeddingFast(dim=half_head_dim, pt_seq_len=hw_seq_len, ft_seq_len=None)
else:
self.rope = None
Expand Down Expand Up @@ -529,19 +529,22 @@ def set_grad_checkpointing(self, enable=True):
self.teacher.set_grad_checkpointing(enable)
self.student.set_grad_checkpointing(enable)

def forward(self, samples, image, bool_masked_pos, **kwargs):
def forward(self, samples, image, bool_masked_pos, get_feats=False):
# [bs, 3, 224, 224] [bs, 3, 224, 224] [bs, 256]
if self.beit_like:
with paddle.no_grad(), paddle.amp.auto_cast():
clip_features = self.teacher.encode_image(image) # [bs, 256, 1024]
bool_masked_pos = bool_masked_pos.flatten(start_axis=1).cast("bool") # [bs, 256]
labels = clip_features[bool_masked_pos] # [N, 1024]

with paddle.amp.auto_cast():
outputs = self.student(samples, bool_masked_pos=bool_masked_pos)
outputs = self.student(samples, bool_masked_pos=bool_masked_pos) # [N, 1024]

loss = compute_loss(outputs, labels)
else:
raise ValueError
if get_feats:
return outputs
return loss


Expand Down
9 changes: 0 additions & 9 deletions tests/models/test_blip2.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import sys

sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), "../.."))
import copy
import inspect
import tempfile
import unittest
Expand Down Expand Up @@ -45,14 +44,6 @@
from tests.testing_utils import slow


def _config_zero_init(config):
configs_no_init = copy.deepcopy(config)
for key in configs_no_init.__dict__.keys():
if "_range" in key or "_std" in key or "initializer_factor" in key or "layer_scale" in key:
setattr(configs_no_init, key, 1e-10)
return configs_no_init


class Blip2VisionModelTester:
def __init__(
self,
Expand Down
9 changes: 0 additions & 9 deletions tests/models/test_clip.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import copy
import inspect
import unittest

Expand All @@ -34,14 +33,6 @@
CLIP_PRETRAINED_MODEL_ARCHIVE_LIST = ["paddlemix/CLIP/Vit_L-14"]


def _config_zero_init(config):
configs_no_init = copy.deepcopy(config)
for key in configs_no_init.__dict__.keys():
if "_range" in key or "_std" in key or "initializer_factor" in key or "layer_scale" in key:
setattr(configs_no_init, key, 1e-10)
return configs_no_init


class VisionTransformerModelTester:
def __init__(
self,
Expand Down
9 changes: 0 additions & 9 deletions tests/models/test_coca.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import copy
import inspect
import unittest

Expand All @@ -36,14 +35,6 @@
CoCa_PRETRAINED_MODEL_ARCHIVE_LIST = ["paddlemix/CoCa/coca_Vit-L-14"]


def _config_zero_init(config):
configs_no_init = copy.deepcopy(config)
for key in configs_no_init.__dict__.keys():
if "_range" in key or "_std" in key or "initializer_factor" in key or "layer_scale" in key:
setattr(configs_no_init, key, 1e-10)
return configs_no_init


class MultimodalTransformerModelTester:
def __init__(
self,
Expand Down
Loading

0 comments on commit a3243e5

Please sign in to comment.