llm/config/mixtral/dpo_argument.json

{
  "model_name_or_path": "mistralai/Mixtral-8x7B-Instruct-v0.1",
  "train_dataset_path": "./data/train.jsonl",
  "dev_dataset_path": "./data/dev.jsonl",
  "output_dir": "./checkpoints/dpo_ckpts",
  "per_device_train_batch_size": 1,
  "gradient_accumulation_steps": 1,
  "per_device_eval_batch_size": 1,
  "num_train_epochs": 1,
  "max_steps": 100,
  "learning_rate": 1e-06,
  "warmup_steps": 10,
  "logging_steps": 1,
  "evaluation_strategy": "steps",
  "save_strategy": "steps",
  "eval_steps": 100,
  "save_steps": 500,
  "max_seq_len": 4096,
  "max_prompt_len": 2048,
  "bf16": true,
  "fp16_opt_level": "O2",
  "do_train": true,
  "do_eval": true,
  "disable_tqdm": true,
  "load_best_model_at_end": true,
  "tensor_parallel_degree": 8,
  "sharding": "stage2",
  "use_flash_attention": true,
  "recompute": false,
  "recompute_granularity": "full",
  "beta": 0.1,
  "benchmark": false,
  "loss_type": "sigmoid",
  "label_smoothing": 0.0,
  "unified_checkpoint": true,
  "autotuner_benchmark":false,
  "lazy": false,
  "seed":42,
  "sft_loss_ratio": 0,
  "pref_loss_ratio": 1.0
}