| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354 |
- """
- 配置文件示例
- """
- # Qwen3.5 0.8B 微调配置
- model_name = "Qwen/Qwen3.5-0.5B" # 或 "Qwen/Qwen3.5-0.8B" 当可用时
- # 数据集配置
- dataset_path = "data/sample_dataset.json"
- instruction_column = "instruction"
- input_column = "input"
- output_column = "output"
- # LoRA 配置
- lora_r = 16
- lora_alpha = 32
- lora_dropout = 0.05
- target_modules = [
- "q_proj",
- "k_proj",
- "v_proj",
- "o_proj",
- "gate_proj",
- "up_proj",
- "down_proj",
- ]
- # 训练配置
- per_device_train_batch_size = 1
- gradient_accumulation_steps = 4
- learning_rate = 2e-4
- num_train_epochs = 3
- max_seq_length = 512
- # 优化器配置
- warmup_ratio = 0.03
- weight_decay = 0.01
- lr_scheduler_type = "cosine"
- # 量化配置
- use_4bit = True
- bnb_4bit_compute_dtype = "float16"
- bnb_4bit_quant_type = "nf4"
- use_nested_quant = False
- # 输出配置
- output_dir = "./outputs/qwen3.5-0.5b-finetuned"
- logging_steps = 10
- save_steps = 50
- evaluation_strategy = "no"
- # 其他配置
- seed = 42
- fp16 = True
|