| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106 |
- """
- 辅助工具函数
- """
- import os
- import time
- import torch
- from typing import Dict, Any
- def setup_environment(seed: int = 42):
- """
- 设置随机种子和环境变量
-
- Args:
- seed: 随机种子
- """
- import random
- import numpy as np
-
- random.seed(seed)
- np.random.seed(seed)
- torch.manual_seed(seed)
- torch.cuda.manual_seed_all(seed)
-
- # 设置环境变量
- os.environ["TOKENIZERS_PARALLELISM"] = "false"
- os.environ["PYTHONHASHSEED"] = str(seed)
-
- print(f"环境设置完成,随机种子:{seed}")
- def get_gpu_info() -> Dict[str, Any]:
- """
- 获取 GPU 信息
-
- Returns:
- GPU 信息字典
- """
- if not torch.cuda.is_available():
- return {"available": False}
-
- info = {
- "available": True,
- "device_count": torch.cuda.device_count(),
- "devices": [],
- }
-
- for i in range(torch.cuda.device_count()):
- device_info = {
- "name": torch.cuda.get_device_name(i),
- "memory_allocated": torch.cuda.memory_allocated(i) / 1e9,
- "memory_reserved": torch.cuda.memory_reserved(i) / 1e9,
- "max_memory": torch.cuda.get_device_properties(i).total_memory / 1e9,
- }
- info["devices"].append(device_info)
-
- return info
- def count_parameters(model) -> Dict[str, int]:
- """
- 统计模型参数
-
- Args:
- model: 模型对象
-
- Returns:
- 参数字典
- """
- total_params = sum(p.numel() for p in model.parameters())
- trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
-
- return {
- "total": total_params,
- "trainable": trainable_params,
- "frozen": total_params - trainable_params,
- }
- def format_time(seconds: float) -> str:
- """
- 格式化时间为可读字符串
-
- Args:
- seconds: 秒数
-
- Returns:
- 格式化后的时间字符串
- """
- if seconds < 60:
- return f"{seconds:.2f}s"
- elif seconds < 3600:
- minutes = seconds / 60
- return f"{minutes:.2f}m"
- else:
- hours = seconds / 3600
- return f"{hours:.2f}h"
- def print_memory_usage():
- """打印内存使用情况"""
- if torch.cuda.is_available():
- allocated = torch.cuda.memory_allocated() / 1e9
- reserved = torch.cuda.memory_reserved() / 1e9
- print(f"GPU 内存 - 已分配:{allocated:.2f}GB, 已保留:{reserved:.2f}GB")
|