PaddlePaddle · zh794390558 · Mar 15, 2023 · Dec 20, 2022 · Dec 20, 2022 · Dec 20, 2022
diff --git a/examples/aishell/asr1/conf/chunk_squeezeformer.yaml b/examples/aishell/asr1/conf/chunk_squeezeformer.yaml
@@ -21,7 +21,6 @@ encoder_conf:
     normalize_before: false
     activation_type: 'swish'
     pos_enc_layer_type: 'rel_pos'
-    do_rel_shift: false
     time_reduction_layer_type: 'stream'
     causal: true
     use_dynamic_chunk: true

diff --git a/examples/aishell/asr1/conf/squeezeformer.yaml b/examples/aishell/asr1/conf/squeezeformer.yaml
@@ -21,7 +21,7 @@ encoder_conf:
     normalize_before: false
     activation_type: 'swish'
     pos_enc_layer_type: 'rel_pos'
-    time_reduction_layer_type: 'conv2d'
+    time_reduction_layer_type: 'conv1d'
 
 # decoder related
 decoder: transformer

diff --git a/paddlespeech/s2t/modules/attention.py b/paddlespeech/s2t/modules/attention.py
@@ -204,7 +204,6 @@ def __init__(self,
                  n_head,
                  n_feat,
                  dropout_rate,
-                 do_rel_shift=False,
                  adaptive_scale=False,
                  init_weights=False):
         """Construct an RelPositionMultiHeadedAttention object.
@@ -229,7 +228,6 @@ def __init__(self,
         pos_bias_v = self.create_parameter(
             (self.h, self.d_k), default_initializer=I.XavierUniform())
         self.add_parameter('pos_bias_v', pos_bias_v)
-        self.do_rel_shift = do_rel_shift
         self.adaptive_scale = adaptive_scale
         if self.adaptive_scale:
             ada_scale = self.create_parameter(
@@ -369,8 +367,7 @@ def forward(self,
         matrix_bd = paddle.matmul(q_with_bias_v, p, transpose_y=True)
         # Remove rel_shift since it is useless in speech recognition,
         # and it requires special attention for streaming.
-        if self.do_rel_shift:
-            matrix_bd = self.rel_shift(matrix_bd)
+        # matrix_bd = self.rel_shift(matrix_bd)
 
         scores = (matrix_ac + matrix_bd) / math.sqrt(
             self.d_k)  # (batch, head, time1, time2)

diff --git a/paddlespeech/s2t/modules/encoder.py b/paddlespeech/s2t/modules/encoder.py
@@ -515,7 +515,6 @@ def __init__(self,
                  input_dropout_rate: float=0.1,
                  pos_enc_layer_type: str="rel_pos",
                  time_reduction_layer_type: str="conv1d",
-                 do_rel_shift: bool=True,
                  feed_forward_dropout_rate: float=0.1,
                  attention_dropout_rate: float=0.1,
                  cnn_module_kernel: int=31,
@@ -549,8 +548,6 @@ def __init__(self,
             input_dropout_rate (float): Dropout rate of input projection layer.
             pos_enc_layer_type (str): Self attention type.
             time_reduction_layer_type (str): Conv1d or Conv2d reduction layer.
-            do_rel_shift (bool): Whether to do relative shift
-                                 operation on rel-attention module.
             cnn_module_kernel (int): Kernel size of CNN module.
             activation_type (str): Encoder activation function type.
             cnn_module_kernel (int): Kernel size of convolution module.
@@ -590,7 +587,7 @@ def __init__(self,
         else:
             encoder_selfattn_layer = RelPositionMultiHeadedAttention
             encoder_selfattn_layer_args = (attention_heads, encoder_dim,
-                                           attention_dropout_rate, do_rel_shift,
+                                           attention_dropout_rate,
                                            adaptive_scale, init_weights)
 
         # feed-forward module definition