Skip to content

Commit

Permalink
fix resolution problem for swin transformer and clip vit (#3021)
Browse files Browse the repository at this point in the history
* Update foundation_vit.py

Update .gitignore

fix time cost problem

Update swin_transformer.py

fix the speed and memory problem

reduce the unnecessary calculation when patch matches resolution

fix conflict

remove check resolution function

Revert "fix conflict"

This reverts commit d7a7dad.

fix conflict

remove the conflict checkpoint function

【Hackathon 5th No.69】 分类大模型--人体视觉任务SOLIDER (#2995)

* add_solider

* add_solider

* add_solider

* add_solider

* add_solider

* add_solider

* add_solider

* add_solider

* add_solider

* add_solider

* add_solider

update doc about PPHGNetV2 (#3002)

fix clip patch embedding resolution problem

support non 224 resolution

integrate the pading function to one

adjust function name

fix the resolution problem for clip-vision transformer part and swim transformer

fix the resolution problem for clip-vision transformer part and swim transformer

* fix cache problem

using the huggingface plan and drop the cache

* Revert "fix cache problem"

This reverts commit 8f7ab55.

* fix resolution problem

* update big model backbone

* Revert "update big model backbone"

This reverts commit 04a39f7.
  • Loading branch information
psky1111 authored Oct 31, 2023
1 parent 60478c3 commit 61f748d
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 3 deletions.
12 changes: 9 additions & 3 deletions ppcls/arch/backbone/legendary_models/swin_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,8 @@ def pading_for_not_divisible(pixel_values,
function="split"):
if isinstance(patch_size, int):
patch_size = (patch_size, patch_size)
if height % patch_size[0] == 0 and width % patch_size[1] == 0:
return pixel_values, (0, 0, 0, 0, 0, 0, 0, 0)
if function == "split":
pading_width = patch_size[1] - width % patch_size[1]
pading_height = patch_size[0] - height % patch_size[0]
Expand Down Expand Up @@ -407,7 +409,7 @@ def __init__(self,
act_layer=act_layer,
drop=drop)
H, W = self.input_resolution
attn_mask = paddle.zeros([1, H, W, 1])
attn_mask = None

self.register_buffer("attn_mask", attn_mask)

Expand Down Expand Up @@ -450,6 +452,9 @@ def forward(self, x, input_dimensions):
x, pad_values = pading_for_not_divisible(x, H, W, self.window_size,
"BHWC")
_, height_pad, width_pad, _ = x.shape

padding_state = pad_values[3] > 0 or pad_values[
5] > 0 # change variable name
# cyclic shift
if self.shift_size > 0:
shifted_x = RollWrapper.roll(
Expand All @@ -465,7 +470,9 @@ def forward(self, x, input_dimensions):
C]) # nW*B, window_size*window_size, C

# W-MSA/SW-MSA
#check did it need to calculate again
attn_mask = self.get_attn_mask(height_pad, width_pad, x.dtype)

attn_windows = self.attn(
x_windows, mask=attn_mask) # nW*B, window_size*window_size, C

Expand All @@ -484,8 +491,7 @@ def forward(self, x, input_dimensions):
else:
x = shifted_x

was_padded = pad_values[3] > 0 or pad_values[5] > 0
if was_padded:
if padding_state:
x = x[:, :H, :W, :]
x = x.reshape([B, H * W, C])

Expand Down
2 changes: 2 additions & 0 deletions ppcls/arch/backbone/model_zoo/foundation_vit.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ def pading_for_not_divisible(pixel_values,
function="split"):
if isinstance(patch_size, int):
patch_size = (patch_size, patch_size)
if height % patch_size[0] == 0 and width % patch_size[1] == 0:
return pixel_values, None
if function == "split":
pading_width = patch_size[1] - width % patch_size[1]
pading_height = patch_size[0] - height % patch_size[0]
Expand Down

0 comments on commit 61f748d

Please sign in to comment.