diff --git a/egs/librispeech/ASR/zipformer/model.py b/egs/librispeech/ASR/zipformer/model.py
index deebb2a754..c7dbe1e0ad 100644
--- a/egs/librispeech/ASR/zipformer/model.py
+++ b/egs/librispeech/ASR/zipformer/model.py
@@ -22,10 +22,10 @@
 import torch
 import torch.nn as nn
 from encoder_interface import EncoderInterface
+from lhotse.dataset import SpecAugment
 from scaling import ScaledLinear
 
 from icefall.utils import add_sos, make_pad_mask, time_warp
-from lhotse.dataset import SpecAugment
 
 
 class AsrModel(nn.Module):
diff --git a/egs/speech_llm/ASR_LLM/whisper_llm_zh/label_smoothing.py b/egs/speech_llm/ASR_LLM/whisper_llm_zh/label_smoothing.py
new file mode 120000
index 0000000000..e9d239fffb
--- /dev/null
+++ b/egs/speech_llm/ASR_LLM/whisper_llm_zh/label_smoothing.py
@@ -0,0 +1 @@
+../../../librispeech/ASR/conformer_ctc/label_smoothing.py
\ No newline at end of file
diff --git a/icefall/utils.py b/icefall/utils.py
index b0a42cefaa..0682252f95 100644
--- a/icefall/utils.py
+++ b/icefall/utils.py
@@ -2282,13 +2282,12 @@ def time_warp(
     time_warp_factor: Optional[int] = 80,
     supervision_segments: Optional[torch.Tensor] = None,
 ):
-    """Apply time warping on a batch of features
-    """
+    """Apply time warping on a batch of features"""
     if time_warp_factor is None or time_warp_factor < 1:
         return features
-    assert len(features.shape) == 3, (
-        "SpecAugment only supports batches of single-channel feature matrices."
-    )
+    assert (
+        len(features.shape) == 3
+    ), f"SpecAugment only supports batches of single-channel feature matrices. {features.shape}"
     features = features.clone()
     if supervision_segments is None:
         # No supervisions - apply spec augment to full feature matrices.