Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
DesmonDay committed Dec 6, 2024
1 parent d815fce commit 1328048
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 3 deletions.
7 changes: 4 additions & 3 deletions llm/config/qwen/emb_argument.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"dataset_name_or_path": "./data",
"output_dir": "./checkpoints/sft_ckpts",
"per_device_train_batch_size": 1,
"gradient_accumulation_steps": 128,
"gradient_accumulation_steps": 4,
"per_device_eval_batch_size": 1,
"eval_accumulation_steps": 1,
"max_steps": 2000,
Expand All @@ -15,7 +15,7 @@
"max_query_len": 1024,
"max_passage_len": 2048,
"group_size": 4,
"bp16": true,
"bf16": true,
"fp16_opt_level": "O2",
"do_train": true,
"do_eval": false,
Expand All @@ -30,5 +30,6 @@
"sharding": "stage2",
"zero_padding": false,
"unified_checkpoint": false,
"use_flash_attention": false
"use_flash_attention": true,
"amp_custom_black_list": "elementwise_div"
}
5 changes: 5 additions & 0 deletions llm/utils/argument.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from dataclasses import dataclass, field
from typing import List, Optional


@dataclass
Expand Down Expand Up @@ -83,3 +84,7 @@ class EmbeddingArgument:
default=True,
metadata={"help": "Whether to share the negatives across all GPUs."},
)
embedding_matryoshka_dims: Optional[List[int]] = field(
default=None,
metadata={"help": "The dims for matryoshka training."},
)

0 comments on commit 1328048

Please sign in to comment.