import os os.environ["CUDA_VISIBLE_DEVICES"] = "" from transformers import AutoModelForCausalLM, AutoTokenizer from peft import PeftModel import torch # 由于 LoRA 权重不能直接转 GGUF,你必须先将它与原模型合并。 lora_path = "model/qwen-0.8b-tender-lora/checkpoint-430" save_path = "model/qwen-merged-model" # 请将此处替换为你上面 find 命令查出来的真实绝对路径 base_model_path = "/home/user/.cache/huggingface/hub/models--Qwen--Qwen3.5-0.8B/snapshots/2fc06364715b967f1860aea9cf38778875588b17" print(f"正在尝试从本地路径加载模型: {base_model_path}") # 显式指定从本地加载 base_model = AutoModelForCausalLM.from_pretrained( base_model_path, torch_dtype=torch.float32, device_map={"": "cpu"}, local_files_only=True, # 强制不联网,路径不对会立刻报错 trust_remote_code=True ) model = PeftModel.from_pretrained(base_model, lora_path) merged_model = model.merge_and_unload() merged_model.save_pretrained(save_path) tokenizer = AutoTokenizer.from_pretrained(base_model_path) tokenizer.save_pretrained(save_path) print("模型已成功合并并保存。")