# 克隆项目
cd FineTuneX
# 安装依赖
pip install -r requirements.txt
# 或者以开发模式安装
pip install -e .
python -c "import torch; print(f'PyTorch: {torch.__version__}')"
python -c "import transformers; print(f'Transformers: {transformers.__version__}')"
# 微调 Qwen3.5 模型
python examples/qwen3.5_0.8b_finetune.py
# 数据预处理
python scripts/preprocess_data.py --input data.json --output data_processed.json
# 模型推理
python scripts/inference.py --model_path ./outputs/model --prompt "你好"
# 启动 API 服务
python scripts/start_api.py --port 8000
FineTuneX 支持标准的指令微调数据格式:
[
{
"instruction": "指令文本",
"input": "输入文本(可选)",
"output": "期望的输出文本"
}
]
项目已包含示例数据集:data/sample_dataset.json
# 转换为 Alpaca 格式
python scripts/preprocess_data.py \
--input your_data.json \
--output alpaca_data.json \
--template alpaca
# 验证数据集
python scripts/preprocess_data.py \
--input your_data.json \
--validate
from finetunex.data import load_dataset, format_dataset
# 加载 JSON 数据
dataset = load_dataset("your_data.json", format="json")
# 加载 CSV 数据
dataset = load_dataset("your_data.csv", format="csv")
# 从 HuggingFace 加载
dataset = load_dataset("squad", split="train")
# 格式化数据集
formatted = format_dataset(
dataset,
instruction_column="question",
output_column="answer",
)
# examples/qwen3.5_0.8b_finetune.py
from finetunex.models import QwenConfig, load_qwen_model
from finetunex.data import load_dataset, InstructionDataset
from finetunex.trainer import FineTuneTrainer
# 1. 配置模型
config = QwenConfig(
model_name="Qwen/Qwen3.5-0.5B",
lora_r=16,
lora_alpha=32,
num_train_epochs=3,
learning_rate=2e-4,
)
# 2. 加载数据
dataset = load_dataset("data/sample_dataset.json")
# 3. 加载模型
model, tokenizer, _ = load_qwen_model(config)
# 4. 创建训练数据集
train_dataset = InstructionDataset(dataset, tokenizer)
# 5. 创建训练器
trainer = FineTuneTrainer(model, tokenizer, config, train_dataset)
# 6. 设置训练
trainer.setup_training(output_dir="./outputs")
# 7. 开始训练
trainer.train()
# 8. 保存模型
trainer.save_model()
config = QwenConfig(
# 模型配置
model_name="Qwen/Qwen3.5-0.5B",
# LoRA 配置
lora_r=16, # LoRA 秩
lora_alpha=32, # LoRA alpha
lora_dropout=0.05, # Dropout 率
target_modules=[ # 目标模块
"q_proj", "v_proj",
"k_proj", "o_proj",
"gate_proj", "up_proj", "down_proj",
],
# 训练配置
per_device_train_batch_size=1,
gradient_accumulation_steps=4,
learning_rate=2e-4,
num_train_epochs=3,
max_seq_length=512,
# 量化配置
use_4bit=True, # 使用 4bit 量化
)
如果显存不足,可以调整以下参数:
config = QwenConfig(
use_4bit=True, # 启用 4bit 量化
per_device_train_batch_size=1, # 减小批次大小
gradient_accumulation_steps=8, # 增加梯度累积
max_seq_length=256, # 减小序列长度
)
# 单次推理
python scripts/inference.py \
--model_path ./outputs/qwen3.5-0.5b-finetuned \
--prompt "请解释什么是机器学习" \
--max_length 512
# 交互模式
python scripts/inference.py \
--model_path ./outputs/qwen3.5-0.5b-finetuned \
--interactive
from transformers import AutoTokenizer
from peft import PeftModel
import torch
# 加载模型
tokenizer = AutoTokenizer.from_pretrained("./outputs/model")
base_model = AutoModelForCausalLM.from_pretrained(
"Qwen/Qwen3.5-0.5B",
device_map="auto",
torch_dtype=torch.float16,
)
model = PeftModel.from_pretrained(base_model, "./outputs/model")
# 生成响应
prompt = "请解释什么是机器学习"
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=100,
temperature=0.7,
do_sample=True,
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(response)
# 默认配置
python scripts/start_api.py
# 自定义端口
python scripts/start_api.py --port 8080
# 禁用自动重载
python scripts/start_api.py --reload false
curl http://localhost:8000/health
curl -X POST http://localhost:8000/api/v1/train \
-H "Content-Type: application/json" \
-d '{
"model_name": "Qwen/Qwen3.5-0.5B",
"dataset_path": "data/sample_dataset.json",
"output_dir": "./outputs",
"num_train_epochs": 3
}'
curl http://localhost:8000/api/v1/train/job_001
curl -X POST http://localhost:8000/api/v1/inference \
-H "Content-Type: application/json" \
-d '{
"model_path": "./outputs/model",
"prompt": "你好",
"max_length": 512
}'
启动服务后访问:http://localhost:8000/docs
# configs/qwen3.5_config.py
# 模型配置
model_name = "Qwen/Qwen3.5-0.5B"
# 数据集配置
dataset_path = "data/sample_dataset.json"
instruction_column = "instruction"
input_column = "input"
output_column = "output"
# LoRA 配置
lora_r = 16
lora_alpha = 32
lora_dropout = 0.05
target_modules = [
"q_proj", "k_proj", "v_proj",
"o_proj", "gate_proj", "up_proj", "down_proj",
]
# 训练配置
per_device_train_batch_size = 1
gradient_accumulation_steps = 4
learning_rate = 2e-4
num_train_epochs = 3
max_seq_length = 512
# 优化器配置
warmup_ratio = 0.03
weight_decay = 0.01
lr_scheduler_type = "cosine"
# 量化配置
use_4bit = True
bnb_4bit_compute_dtype = "float16"
bnb_4bit_quant_type = "nf4"
# 输出配置
output_dir = "./outputs/qwen3.5-0.5b-finetuned"
logging_steps = 10
save_steps = 50
# 其他配置
seed = 42
fp16 = True
问题: CUDA out of memory
解决方案:
use_4bit=Trueper_device_train_batch_size=1gradient_accumulation_steps=8max_seq_length=256问题: 从 HuggingFace 下载模型速度慢
解决方案:
# 使用镜像站
export HF_ENDPOINT=https://hf-mirror.com
python examples/qwen3.5_0.8b_finetune.py
问题: 训练 loss 不下降
解决方案:
1e-4 或 5e-5num_train_epochs=5lora_r=32问题: 生成结果不符合预期
解决方案:
temperature=0.5 (更确定) 或 temperature=0.9 (更随机)问题: pip install 报错
解决方案:
# 创建虚拟环境
python -m venv venv
source venv/bin/activate # Windows: venv\Scripts\activate
# 升级 pip
pip install --upgrade pip
# 重新安装
pip install -r requirements.txt
欢迎贡献代码!请遵循以下步骤:
MIT License