Skip to content

Commit

Permalink
add_solider
Browse files Browse the repository at this point in the history
  • Loading branch information
Yang-Changhui committed Oct 15, 2023
1 parent d6d91b3 commit 1812f81
Show file tree
Hide file tree
Showing 8 changed files with 51 additions and 51 deletions.
36 changes: 20 additions & 16 deletions ppcls/arch/backbone/legendary_models/swin_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,21 +27,22 @@

MODEL_URLS = {
"SwinTransformer_tiny_patch4_window7_224":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/SwinTransformer_tiny_patch4_window7_224_pretrained.pdparams",
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/SwinTransformer_tiny_patch4_window7_224_pretrained.pdparams",
"SwinTransformer_small_patch4_window7_224":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/SwinTransformer_small_patch4_window7_224_pretrained.pdparams",
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/SwinTransformer_small_patch4_window7_224_pretrained.pdparams",
"SwinTransformer_base_patch4_window7_224":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/SwinTransformer_base_patch4_window7_224_pretrained.pdparams",
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/SwinTransformer_base_patch4_window7_224_pretrained.pdparams",
"SwinTransformer_base_patch4_window12_384":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/SwinTransformer_base_patch4_window12_384_pretrained.pdparams",
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/SwinTransformer_base_patch4_window12_384_pretrained.pdparams",
"SwinTransformer_large_patch4_window7_224":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/SwinTransformer_large_patch4_window7_224_pretrained.pdparams",
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/SwinTransformer_large_patch4_window7_224_pretrained.pdparams",
"SwinTransformer_large_patch4_window12_384":
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/SwinTransformer_large_patch4_window12_384_pretrained.pdparams",
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/SwinTransformer_large_patch4_window12_384_pretrained.pdparams",
}

__all__ = list(MODEL_URLS.keys())


# The following re-implementation of roll is inspired by
# https://gitee.com/ascend/pytorch/blob/master/torch_npu/contrib/function/roll.py

Expand Down Expand Up @@ -195,7 +196,7 @@ def __init__(self,
self.window_size = window_size # Wh, Ww
self.num_heads = num_heads
head_dim = dim // num_heads
self.scale = qk_scale or head_dim**-0.5
self.scale = qk_scale or head_dim ** -0.5

# define a parameter table of relative position bias
# 2*Wh-1 * 2*Ww-1, nH
Expand All @@ -220,7 +221,7 @@ def __init__(self,
relative_coords = relative_coords.transpose(
[1, 2, 0]) # Wh*Ww, Wh*Ww, 2
relative_coords[:, :, 0] += self.window_size[
0] - 1 # shift to start from 0
0] - 1 # shift to start from 0
relative_coords[:, :, 1] += self.window_size[1] - 1
relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1
relative_position_index = relative_coords.sum(-1) # Wh*Ww, Wh*Ww
Expand Down Expand Up @@ -262,7 +263,7 @@ def forward(self, x, mask=None):
B_, N, C = x.shape
qkv = self.qkv(x).reshape(
[B_, N, 3, self.num_heads, C // self.num_heads]).transpose(
[2, 0, 3, 1, 4])
[2, 0, 3, 1, 4])
q, k, v = qkv[0], qkv[1], qkv[2]

q = q * self.scale
Expand Down Expand Up @@ -407,12 +408,14 @@ def __init__(self,
attn_mask = None

self.register_buffer("attn_mask", attn_mask)

def check_condition(self):
if min(self.input_resolution) <= self.window_size:
# if window size is larger than input resolution, we don't partition windows
self.shift_size = 0
self.window_size = min(self.input_resolution)
assert 0 <= self.shift_size < self.window_size, "shift_size must in 0-window_size"

def forward(self, x):
H, W = self.input_resolution
B, L, C = x.shape
Expand Down Expand Up @@ -676,7 +679,7 @@ def forward(self, x):
def flops(self):
Ho, Wo = self.patches_resolution
flops = Ho * Wo * self.embed_dim * self.in_chans * (
self.patch_size[0] * self.patch_size[1])
self.patch_size[0] * self.patch_size[1])
if self.norm is not None:
flops += Ho * Wo * self.embed_dim
return flops
Expand Down Expand Up @@ -735,7 +738,7 @@ def __init__(self,
self.embed_dim = embed_dim
self.ape = ape
self.patch_norm = patch_norm
self.num_features = int(embed_dim * 2**(self.num_layers - 1))
self.num_features = int(embed_dim * 2 ** (self.num_layers - 1))
self.mlp_ratio = mlp_ratio

# split image into non-overlapping patches
Expand Down Expand Up @@ -766,9 +769,9 @@ def __init__(self,
self.layers = nn.LayerList()
for i_layer in range(self.num_layers):
layer = BasicLayer(
dim=int(embed_dim * 2**i_layer),
input_resolution=(patches_resolution[0] // (2**i_layer),
patches_resolution[1] // (2**i_layer)),
dim=int(embed_dim * 2 ** i_layer),
input_resolution=(patches_resolution[0] // (2 ** i_layer),
patches_resolution[1] // (2 ** i_layer)),
depth=depths[i_layer],
num_heads=num_heads[i_layer],
window_size=window_size,
Expand Down Expand Up @@ -826,7 +829,7 @@ def flops(self):
for _, layer in enumerate(self.layers):
flops += layer.flops()
flops += self.num_features * self.patches_resolution[
0] * self.patches_resolution[1] // (2**self.num_layers)
0] * self.patches_resolution[1] // (2 ** self.num_layers)
flops += self.num_features * self.num_classes
return flops

Expand All @@ -836,7 +839,8 @@ def _load_pretrained(pretrained,
model_url,
use_ssld=False,
use_imagenet22k_pretrained=False,
use_imagenet22kto1k_pretrained=False):
use_imagenet22kto1k_pretrained=False,
**kwargs):
if pretrained is False:
pass
elif pretrained is True:
Expand Down
56 changes: 26 additions & 30 deletions ppcls/arch/backbone/variant_models/swin_transformer_variant.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,18 @@
import paddle
import paddle.nn as nn
from ..legendary_models.swin_transformer import SwinTransformer, _load_pretrained, \
MODEL_URLS, PatchEmbed, BasicLayer,SwinTransformerBlock
PatchEmbed, BasicLayer, SwinTransformerBlock

__all__ = ["SwinTransformer_tiny_patch4_window7_224_SOLIDER",
"SwinTransformer_small_patch4_window7_224_SOLIDER",
"SwinTransformer_base_patch4_window7_224_SOLIDER"]
MODEL_URLS_SOLIDER = {
"SwinTransformer_tiny_patch4_window7_224_SOLIDER":
'https://paddleclas.bj.bcebos.com/models/SOILDER/SwinTransformer_tiny_patch4_window7_224_pretrained.pdparams',
"SwinTransformer_small_patch4_window7_224_SOLIDER":
'https://paddleclas.bj.bcebos.com/models/SOILDER/SwinTransformer_small_patch4_window7_224_pretrained.pdparams',
"SwinTransformer_base_patch4_window7_224_SOLIDER":
'https://paddleclas.bj.bcebos.com/models/SOILDER/SwinTransformer_base_patch4_window7_224_pretrained.pdparams'
}

__all__ = list(MODEL_URLS_SOLIDER.keys())


class PatchEmbed_SOLIDER(PatchEmbed):
Expand All @@ -18,6 +25,7 @@ def forward(self, x):
x = self.norm(x)
return x, out_size


class SwinTransformerBlock_SOLIDER(SwinTransformerBlock):
r""" Swin Transformer Block.
Expand Down Expand Up @@ -51,7 +59,7 @@ def __init__(self,
drop_path=0.,
act_layer=nn.GELU,
norm_layer=nn.LayerNorm):
super(SwinTransformerBlock_SOLIDER,self).__init__(
super(SwinTransformerBlock_SOLIDER, self).__init__(
dim=dim,
input_resolution=input_resolution,
num_heads=num_heads,
Expand All @@ -73,12 +81,15 @@ def __init__(self,
self.shift_size = shift_size
self.mlp_ratio = mlp_ratio
self.check_condition()

def check_condition(self):
if min(self.input_resolution) < self.window_size:
# if window size is larger than input resolution, we don't partition windows
self.shift_size = 0
self.window_size = min(self.input_resolution)
assert 0 <= self.shift_size < self.window_size, "shift_size must in 0-window_size"


class BasicLayer_SOLIDER(BasicLayer):
def __init__(self,
dim,
Expand All @@ -96,7 +107,7 @@ def __init__(self,
downsample=None,
use_checkpoint=False):

super(BasicLayer_SOLIDER,self).__init__(
super(BasicLayer_SOLIDER, self).__init__(
dim=dim,
input_resolution=input_resolution,
depth=depth,
Expand Down Expand Up @@ -291,9 +302,6 @@ def forward_features(self, x, semantic_weight=None):

def SwinTransformer_tiny_patch4_window7_224_SOLIDER(
pretrained=False,
use_ssld=False,
use_imagenet22k_pretrained=False,
use_imagenet22kto1k_pretrained=False,
**kwargs):
model = SwinTransformer_SOLIDER(
embed_dim=96,
Expand All @@ -304,19 +312,14 @@ def SwinTransformer_tiny_patch4_window7_224_SOLIDER(
**kwargs)
_load_pretrained(
pretrained,
model,
MODEL_URLS["SwinTransformer_tiny_patch4_window7_224"],
use_ssld=use_ssld,
use_imagenet22k_pretrained=use_imagenet22k_pretrained,
use_imagenet22kto1k_pretrained=use_imagenet22kto1k_pretrained)
model=model,
model_url=MODEL_URLS_SOLIDER["SwinTransformer_tiny_patch4_window7_224_SOLIDER"],
**kwargs)
return model


def SwinTransformer_small_patch4_window7_224_SOLIDER(
pretrained=False,
use_ssld=False,
use_imagenet22k_pretrained=False,
use_imagenet22kto1k_pretrained=False,
**kwargs):
model = SwinTransformer_SOLIDER(
embed_dim=96,
Expand All @@ -327,19 +330,14 @@ def SwinTransformer_small_patch4_window7_224_SOLIDER(
**kwargs)
_load_pretrained(
pretrained,
model,
MODEL_URLS["SwinTransformer_small_patch4_window7_224"],
use_ssld=use_ssld,
use_imagenet22k_pretrained=use_imagenet22k_pretrained,
use_imagenet22kto1k_pretrained=use_imagenet22kto1k_pretrained)
model=model,
model_url=MODEL_URLS_SOLIDER["SwinTransformer_small_patch4_window7_224_SOLIDER"],
**kwargs)
return model


def SwinTransformer_base_patch4_window7_224_SOLIDER(
pretrained=False,
use_ssld=False,
use_imagenet22k_pretrained=False,
use_imagenet22kto1k_pretrained=False,
**kwargs):
model = SwinTransformer_SOLIDER(
embed_dim=128,
Expand All @@ -350,9 +348,7 @@ def SwinTransformer_base_patch4_window7_224_SOLIDER(
**kwargs)
_load_pretrained(
pretrained,
model,
MODEL_URLS["SwinTransformer_base_patch4_window7_224"],
use_ssld=use_ssld,
use_imagenet22k_pretrained=use_imagenet22k_pretrained,
use_imagenet22kto1k_pretrained=use_imagenet22kto1k_pretrained)
model=model,
model_url=MODEL_URLS_SOLIDER["SwinTransformer_base_patch4_window7_224_SOLIDER"],
**kwargs)
return model
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ AMP:
Arch:
name: SwinTransformer_base_patch4_window12_384
class_num: 1000
pretrained: True

# loss function config for traing/eval process
Loss:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ AMP:
Arch:
name: SwinTransformer_base_patch4_window7_224
class_num: 1000
pretrained: True

# loss function config for traing/eval process
Loss:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ AMP:
Arch:
name: SwinTransformer_large_patch4_window12_384
class_num: 1000

# loss function config for traing/eval process
Loss:
Train:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ AMP:
Arch:
name: SwinTransformer_large_patch4_window7_224
class_num: 1000

# loss function config for traing/eval process
Loss:
Train:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ AMP:
Arch:
name: SwinTransformer_small_patch4_window7_224
class_num: 1000

# loss function config for traing/eval process
Loss:
Train:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ AMP:
Arch:
name: SwinTransformer_tiny_patch4_window7_224
class_num: 1000

# loss function config for traing/eval process
Loss:
Train:
Expand Down

0 comments on commit 1812f81

Please sign in to comment.