init_project.py 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184
  1. """
  2. FineTuneX 项目初始化脚本
  3. """
  4. import os
  5. import sys
  6. def create_directories():
  7. """创建必要的目录"""
  8. directories = [
  9. "outputs",
  10. "outputs/checkpoints",
  11. "data",
  12. "data/processed",
  13. "logs",
  14. "models",
  15. ]
  16. for directory in directories:
  17. os.makedirs(directory, exist_ok=True)
  18. print(f"✓ 创建目录:{directory}")
  19. def create_gitignore():
  20. """创建 .gitignore 文件"""
  21. gitignore_content = """# Python
  22. __pycache__/
  23. *.py[cod]
  24. *$py.class
  25. *.so
  26. .Python
  27. build/
  28. develop-eggs/
  29. dist/
  30. downloads/
  31. eggs/
  32. .eggs/
  33. lib/
  34. lib64/
  35. parts/
  36. sdist/
  37. var/
  38. wheels/
  39. *.egg-info/
  40. .installed.cfg
  41. *.egg
  42. # Virtual environments
  43. venv/
  44. env/
  45. ENV/
  46. .venv
  47. # IDE
  48. .vscode/
  49. .idea/
  50. *.swp
  51. *.swo
  52. *~
  53. # Jupyter Notebook
  54. .ipynb_checkpoints
  55. # Project specific
  56. outputs/
  57. *.pth
  58. *.pt
  59. *.bin
  60. *.onnx
  61. logs/
  62. *.log
  63. .DS_Store
  64. Thumbs.db
  65. # Secrets
  66. .env
  67. *.key
  68. *.pem
  69. """
  70. with open(".gitignore", "w", encoding="utf-8") as f:
  71. f.write(gitignore_content)
  72. print("✓ 创建 .gitignore")
  73. def create_env_example():
  74. """创建 .env.example 文件"""
  75. env_content = """# HuggingFace
  76. HF_TOKEN=your_huggingface_token_here
  77. HF_ENDPOINT=https://huggingface.co
  78. # Weights & Biases (可选)
  79. WANDB_API_KEY=your_wandb_key_here
  80. WANDB_PROJECT=finetunex
  81. # API Configuration
  82. API_HOST=0.0.0.0
  83. API_PORT=8000
  84. """
  85. with open(".env.example", "w", encoding="utf-8") as f:
  86. f.write(env_content)
  87. print("✓ 创建 .env.example")
  88. def create_readme():
  89. """创建快速开始 README"""
  90. readme_content = """# FineTuneX 快速开始
  91. ## 1. 安装依赖
  92. ```bash
  93. pip install -r requirements.txt
  94. ```
  95. ## 2. 运行示例
  96. ```bash
  97. python examples/qwen3.5_0.8b_finetune.py
  98. ```
  99. ## 3. 使用自己的数据
  100. 准备数据文件 `data.json`:
  101. ```json
  102. [
  103. {
  104. "instruction": "你的指令",
  105. "input": "输入(可选)",
  106. "output": "期望输出"
  107. }
  108. ]
  109. ```
  110. 修改示例脚本中的数据集路径,然后运行。
  111. ## 4. 推理
  112. ```bash
  113. python scripts/inference.py --model_path ./outputs/qwen3.5-0.5b-finetuned --interactive
  114. ```
  115. ## 5. API 服务
  116. ```bash
  117. python scripts/start_api.py
  118. ```
  119. 访问 http://localhost:8000/docs 查看 API 文档。
  120. ## 更多信息
  121. 查看 [完整文档](docs/usage.md)
  122. """
  123. with open("QUICKSTART.md", "w", encoding="utf-8") as f:
  124. f.write(readme_content)
  125. print("✓ 创建 QUICKSTART.md")
  126. def main():
  127. print("=" * 60)
  128. print("FineTuneX 项目初始化")
  129. print("=" * 60)
  130. create_directories()
  131. create_gitignore()
  132. create_env_example()
  133. create_readme()
  134. print("\n" + "=" * 60)
  135. print("项目初始化完成!")
  136. print("=" * 60)
  137. print("\n下一步:")
  138. print("1. 查看 QUICKSTART.md 了解快速开始")
  139. print("2. 查看 docs/usage.md 了解详细文档")
  140. print("3. 运行:python examples/qwen3.5_0.8b_finetune.py")
  141. print("=" * 60)
  142. if __name__ == "__main__":
  143. main()