import base64 import random import re import traceback import pandas as pd import requests from bs4 import BeautifulSoup from openai import OpenAI import time # client = OpenAI(base_url="http://192.168.2.103:11434/v1", api_key="ollama") # # instruction = '提取以上招投标文档的关键信息,只输出有值的,其中中标人等放在多标段信息中形成数组,' \ # '产品参数等放在产品信息中形成数组' \ # '金额均以元为单位,时间格式为YYYY-MM-DD HH:MM:SS,无多余内容' \ # '直接得到要素提取Json:' # # answer_prefix = '{"项目名称":' # # _text = "树村220千伏变电站110千伏送出工程-拆改工程(设计)中标候选人公示 招标项目名称: 树村220千伏变电站110千伏送出工程-拆改工程(设计) 招标编号: 91110108777085234D 招标项目编号: Z1101000311004312001 所属行业: 其他 所属地区: 北京市-海淀区 开始时间: 2026-03-06 结束时间: 2026-03-10 招标单位: 北京海融达投资建设有限公司 招标代理: 天行健项目管理咨询(北京)有限公司 发布时间: 2026-03-06 17:17:12.0 公示内容 公告文件.pdf" # # prompt = f"<|im_start|>user\n{_text}\n{instruction}<|im_end|>\n<|im_start|>assistant\n{answer_prefix}" # # # start_time = time.time() # response = client.chat.completions.create( # model="bidding-qwen-0.8B-lora-16K", # messages=[{"role": "user", "content": prompt}], # temperature=0, # stop=["<|im_end|>", "<|endoftext|>", "###"], # max_tokens=4096, # ) # end_time = time.time() # # answer = response.choices[0].message.content # answer = answer_prefix + answer # # # 获取指标 # usage = response.usage # prompt_tokens = usage.prompt_tokens # completion_tokens = usage.completion_tokens # total_duration = end_time - start_time # tps = completion_tokens / total_duration if total_duration > 0 else 0 # # print(f"回答: {answer}") # print("-" * 30) # print(f"指标统计:") # print(f" - 输入 Tokens: {prompt_tokens}") # print(f" - 输出 Tokens: {completion_tokens}") # print(f" - 总耗时: {total_duration:.2f} s") # print(f" - 推理速度: {tps:.2f} tokens/s") url = "http://192.168.2.103:11434/api/generate" url2 = "http://192.168.2.103:11435/api/generate" url_docker = "http://192.168.2.103:11436/api/generate" url_docker_chat = "http://192.168.2.103:11436/api/chat" def infer_ollama_api(prompt, answer_prefix, model_name=None, temperature=0.05): now_url = url_docker if model_name is None: model_name = "bidding-qwen-0.8B-lora:latest" # model_name = 'deepseek-r1:1.5b' print('infer_ollama_api', now_url, model_name) payload = { "model": model_name, # "model": "bidding-qwen-2B-lora-16k:latest", # "model": "qwen3.5:0.8b", "prompt": prompt, "stream": False, "raw": True, "options": { "num_ctx": int(1024*16), "temperature": temperature, "num_predict": 4096, "repeat_penalty": 1.18, "repeat_last_n": -1, "stop": ["<|im_end|>", "<|endoftext|>"], "seed": 42, # 固定随机种子 "top_k": 50, # 只选概率最高的一个 Token(贪婪搜索) "top_p": 0.99, } } response = requests.post(now_url, json=payload) result = response.json() # 核心:手动把你的前缀和模型生成的后续内容拼起来 final_content = answer_prefix + result["response"] # print(final_content) return final_content