colab_example.py 1.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253
  1. """
  2. Colab 笔记本示例
  3. """
  4. # 在 Google Colab 中运行 FineTuneX
  5. # 1. 克隆项目
  6. # !git clone https://github.com/yourusername/FineTuneX.git
  7. # %cd FineTuneX
  8. # 2. 安装依赖
  9. # !pip install -r requirements.txt
  10. # 3. 准备数据
  11. sample_data = """
  12. [
  13. {
  14. "instruction": "请解释什么是机器学习",
  15. "input": "",
  16. "output": "机器学习是人工智能的一个分支,它使计算机能够从数据中学习。"
  17. },
  18. {
  19. "instruction": "将以下中文翻译成英文",
  20. "input": "今天天气很好",
  21. "output": "The weather is very nice today."
  22. }
  23. ]
  24. """
  25. with open("data/sample.json", "w", encoding="utf-8") as f:
  26. f.write(sample_data)
  27. # 4. 运行微调
  28. # !python examples/qwen3.5_0.8b_finetune.py
  29. # 5. 测试推理
  30. """
  31. from transformers import AutoTokenizer
  32. from peft import PeftModel
  33. import torch
  34. tokenizer = AutoTokenizer.from_pretrained("./outputs/qwen3.5-0.5b-finetuned")
  35. model = PeftModel.from_pretrained(
  36. AutoModelForCausalLM.from_pretrained("Qwen/Qwen3.5-0.5B"),
  37. "./outputs/qwen3.5-0.5b-finetuned"
  38. )
  39. model = model.to("cuda")
  40. prompt = "请解释什么是深度学习"
  41. inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
  42. outputs = model.generate(**inputs, max_new_tokens=100)
  43. print(tokenizer.decode(outputs[0], skip_special_tokens=True))
  44. """