""" 辅助工具函数 """ import os import time import torch from typing import Dict, Any def setup_environment(seed: int = 42): """ 设置随机种子和环境变量 Args: seed: 随机种子 """ import random import numpy as np random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed) if hasattr(torch, 'npu') and torch.npu.is_available(): torch.npu.manual_seed_all(seed) # 设置环境变量(华为升腾 NPU) os.environ["TOKENIZERS_PARALLELISM"] = "false" os.environ["PYTHONHASHSEED"] = str(seed) print(f"环境设置完成,随机种子:{seed}") def get_gpu_info() -> Dict[str, Any]: """ 获取 GPU/NPU 信息 Returns: GPU/NPU 信息字典 """ # 检查 CUDA if torch.cuda.is_available(): info = { "available": True, "device_type": "cuda", "device_count": torch.cuda.device_count(), "devices": [], } for i in range(torch.cuda.device_count()): device_info = { "name": torch.cuda.get_device_name(i), "memory_allocated": torch.cuda.memory_allocated(i) / 1e9, "memory_reserved": torch.cuda.memory_reserved(i) / 1e9, "max_memory": torch.cuda.get_device_properties(i).total_memory / 1e9, } info["devices"].append(device_info) return info # 检查 NPU(华为升腾) if hasattr(torch, 'npu') and torch.npu.is_available(): info = { "available": True, "device_type": "npu", "device_count": torch.npu.device_count(), "devices": [], } for i in range(torch.npu.device_count()): device_info = { "name": f"NPU {i}", "memory_allocated": 0, "memory_reserved": 0, "max_memory": 0, } info["devices"].append(device_info) return info return {"available": False} def count_parameters(model) -> Dict[str, int]: """ 统计模型参数 Args: model: 模型对象 Returns: 参数字典 """ total_params = sum(p.numel() for p in model.parameters()) trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) return { "total": total_params, "trainable": trainable_params, "frozen": total_params - trainable_params, } def format_time(seconds: float) -> str: """ 格式化时间为可读字符串 Args: seconds: 秒数 Returns: 格式化后的时间字符串 """ if seconds < 60: return f"{seconds:.2f}s" elif seconds < 3600: minutes = seconds / 60 return f"{minutes:.2f}m" else: hours = seconds / 3600 return f"{hours:.2f}h" def print_memory_usage(): """打印内存使用情况""" if torch.cuda.is_available(): allocated = torch.cuda.memory_allocated() / 1e9 reserved = torch.cuda.memory_reserved() / 1e9 print(f"GPU 内存 - 已分配:{allocated:.2f}GB, 已保留:{reserved:.2f}GB") elif hasattr(torch, 'npu') and torch.npu.is_available(): print("NPU 内存统计(华为升腾)")