Yolov4 with Efficientnet b0-b7 Backbone

shihyung
13 min readNov 10, 2020

--

* 目的:

這邊嘗試著以 EfficientNet b0-b7 將 Yolov4 的 backbone 做替換來看看會有怎樣的訓練趨勢。

* Backbone 替換

* yaml 檔修改

原始的 Yolov4_L yaml 檔案的 backbone:

修改後的 Yolov4_L_Efficientnet_b0 yaml:

修改後的 Yolov4_L_Efficientnet_b1 yaml:

修改後的 Yolov4_L_Efficientnet_b2 yaml:

修改後的 Yolov4_L_Efficientnet_b3 yaml:

修改後的 Yolov4_L_Efficientnet_b4 yaml:

修改後的 Yolov4_L_Efficientnet_b5 yaml:

修改後的 Yolov4_L_Efficientnet_b6 yaml:

修改後的 Yolov4_L_Efficientnet_b7 yaml:

* 程式修改

yolo.py, parse_model() 增加:

eff_n=n
elif m is CBS:
c1=ch[f if f<0 else f+1]
c2=args[0]
args=[c1,c2,*args[1:]]
elif m is effLayer:
c1=ch[f if f<0 else f+1]
c2=args[0]
args=[c1,c2,eff_n,*args[1:]]

common.py 增加:

class CBS(nn.Module):
def __init__(self, c1, c2, k, s, s_count=0, bnum='b'):
super(CBS, self).__init__()
image_sizes={'b0':224,'b1':240,'b2':260,'b3':300,'b4':380,'b5':456,'b6':528,'b7':600}
if s_count>1:
image_size=int(image_sizes[bnum]/(2**s_count))
else:
image_size=image_sizes[bnum]
Conv2d = get_same_padding_conv2d(image_size=image_size)
self.conv=Conv2d(c1, c2, kernel_size=k, stride=s, bias=False)
self.bn = nn.BatchNorm2d(c2)
self.swish = MemoryEfficientSwish()
def forward(self, x):
x=self.swish(self.bn(self.conv(x)))
return x

class effBlock(nn.Module):
def __init__(self, c1, c2, k=1, s=1, exp_r=1, se_r=None, image_size=0): #chin, plane, block_nums, group, width_per_group
super(effBlock,self).__init__()
self.expansion_ratio=exp_r
self.has_se = (se_r is not None) and (0 < se_r <= 1)
self.ch_in=c1
self.ch_tmp=c1*exp_r
self.ch_out=c2
self.stride=s

# Expansion phase (Inverted Bottleneck)
oup = self.ch_tmp # number of output channels
if self.expansion_ratio != 1:
Conv2d = get_same_padding_conv2d(image_size=image_size)
self._expand_conv = Conv2d(in_channels=self.ch_in, out_channels=self.ch_tmp, kernel_size=1, bias=False)
self._bn0 = nn.BatchNorm2d(num_features=self.ch_tmp)

# Depthwise convolution phase
Conv2d = get_same_padding_conv2d(image_size=image_size)
self._depthwise_conv = Conv2d(in_channels=self.ch_tmp, out_channels=self.ch_tmp, groups=self.ch_tmp, kernel_size=k, stride=s, bias=False)
self._bn1 = nn.BatchNorm2d(num_features=self.ch_tmp)
image_size = calculate_output_image_size(image_size, s)

# Squeeze and Excitation layer, if desired
if self.has_se:
Conv2d = get_same_padding_conv2d(image_size=(1, 1))
num_squeezed_channels = max(1, int(self.ch_in * se_r))
self._se_reduce = Conv2d(in_channels=self.ch_tmp, out_channels=num_squeezed_channels, kernel_size=1)
self._se_expand = Conv2d(in_channels=num_squeezed_channels, out_channels=self.ch_tmp, kernel_size=1)

# Pointwise convolution phase
Conv2d = get_same_padding_conv2d(image_size=image_size)
self._project_conv = Conv2d(in_channels=self.ch_tmp, out_channels=self.ch_out, kernel_size=1, bias=False)
self._bn2 = nn.BatchNorm2d(num_features=self.ch_out)
self._swish = MemoryEfficientSwish()

def forward(self, inputs, drop_connect_rate=None):
#print('effBlock forward...')
# Expansion and Depthwise Convolution
x = inputs
#print('x0: ',x.size())
if self.expansion_ratio != 1:
x = self._expand_conv(inputs)
x = self._bn0(x)
x = self._swish(x)

x = self._depthwise_conv(x)
x = self._bn1(x)
x = self._swish(x)
#print('x1: ',x.size())
# Squeeze and Excitation
if self.has_se:
x_squeezed = F.adaptive_avg_pool2d(x, 1)
x_squeezed = self._se_reduce(x_squeezed)
x_squeezed = self._swish(x_squeezed)
x_squeezed = self._se_expand(x_squeezed)
x = torch.sigmoid(x_squeezed) * x

# Pointwise Convolution
x = self._project_conv(x)
x = self._bn2(x)
#print('x2: ',x.size())
# Skip connection and drop connect
if self.stride == 1 and self.ch_in == self.ch_out:
# The combination of skip connection and drop connect brings about stochastic depth.
if drop_connect_rate:
x = drop_connect(x, p=drop_connect_rate)
x = x + inputs # skip connection
#print('x3: ',x.size())
return x

class effLayer(nn.Module):
def __init__(self, c1, c2, n=1, k=1, s=1, s_count=0, exp_r=1, se_r=1, bnum=0, md='b'): #chin, plane, block_nums, group, width_per_group
super(effLayer,self).__init__()
params_dict = {'b0': (1.0, 1.0, 224, 0.2),
'b1': (1.0, 1.1, 240, 0.2),
'b2': (1.1, 1.2, 260, 0.3),
'b3': (1.2, 1.4, 300, 0.3),
'b4': (1.4, 1.8, 380, 0.4),
'b5': (1.6, 2.2, 456, 0.4),
'b6': (1.8, 2.6, 528, 0.5),
'b7': (2.0, 3.1, 600, 0.5)}
width_coef, depth_coef, image_size, dropout_rate = params_dict[md]

self.global_params = GlobalParams(
width_coefficient=width_coef,
depth_coefficient=depth_coef,
image_size=image_size,
dropout_rate=dropout_rate,
num_classes=1000,
batch_norm_momentum=0.99,
batch_norm_epsilon=1e-3,
drop_connect_rate=0.2,
depth_divisor=8,
min_depth=None,
include_top=True,
)

self.block_number=bnum
if s==2:
image_size = calculate_output_image_size(self.global_params.image_size, 2**(s_count-1))
else:
image_size = calculate_output_image_size(self.global_params.image_size, 2**s_count)

self.blocks = nn.ModuleList([effBlock(c1, c2, k, s, exp_r, se_r, image_size)])
image_size = calculate_output_image_size(image_size, s)
for _ in range(n - 1):
self.blocks.append(effBlock(c2, c2, k, 1, exp_r, se_r, image_size))

def forward(self, x):
for idx, block in enumerate(self.blocks):
drop_connect_rate = self.global_params.drop_connect_rate
if drop_connect_rate:
drop_connect_rate *= float(idx+self.block_number) / 16 # scale drop connect_rate # Total blocks of efficientnet is 16
#print(f'*** bnum: {idx}, drop connect rate: {drop_connect_rate}')
x = block(x, drop_connect_rate=drop_connect_rate)
return x

* parameter 變化量

原始的 Yolov4_S:

原始的 Yolov4_L:

修改後的 Yolov4_Efficientnet_b0:

修改後的 Yolov4_Efficientnet_b1:

修改後的 Yolov4_Efficientnet_b2:

修改後的 Yolov4_Efficientnet_b3:

修改後的 Yolov4_Efficientnet_b4:

修改後的 Yolov4_Efficientnet_b5:

修改後的 Yolov4_Efficientnet_b6:

修改後的 Yolov4_Efficientnet_b7:

* 測試結果

因為coco 圖片集太多,為實驗方便,此處依舊僅取其車輛部分 names: [‘motorcycle’,’car’,’bus’,’truck’], 機器有限所以只有測試 b0/b1, 測試結果如下:

--

--

No responses yet