qwen3.5_config.py 977 B

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354
  1. """
  2. 配置文件示例
  3. """
  4. # Qwen3.5 0.8B 微调配置
  5. model_name = "Qwen/Qwen3.5-0.5B" # 或 "Qwen/Qwen3.5-0.8B" 当可用时
  6. # 数据集配置
  7. dataset_path = "data/sample_dataset.json"
  8. instruction_column = "instruction"
  9. input_column = "input"
  10. output_column = "output"
  11. # LoRA 配置
  12. lora_r = 16
  13. lora_alpha = 32
  14. lora_dropout = 0.05
  15. target_modules = [
  16. "q_proj",
  17. "k_proj",
  18. "v_proj",
  19. "o_proj",
  20. "gate_proj",
  21. "up_proj",
  22. "down_proj",
  23. ]
  24. # 训练配置
  25. per_device_train_batch_size = 1
  26. gradient_accumulation_steps = 4
  27. learning_rate = 2e-4
  28. num_train_epochs = 3
  29. max_seq_length = 512
  30. # 优化器配置
  31. warmup_ratio = 0.03
  32. weight_decay = 0.01
  33. lr_scheduler_type = "cosine"
  34. # 量化配置
  35. use_4bit = True
  36. bnb_4bit_compute_dtype = "float16"
  37. bnb_4bit_quant_type = "nf4"
  38. use_nested_quant = False
  39. # 输出配置
  40. output_dir = "./outputs/qwen3.5-0.5b-finetuned"
  41. logging_steps = 10
  42. save_steps = 50
  43. evaluation_strategy = "no"
  44. # 其他配置
  45. seed = 42
  46. fp16 = True