| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121 |
- import base64
- import random
- import re
- import traceback
- import pandas as pd
- import requests
- from bs4 import BeautifulSoup
- from openai import OpenAI
- import time
- # client = OpenAI(base_url="http://192.168.2.103:11434/v1", api_key="ollama")
- #
- # instruction = '提取以上招投标文档的关键信息,只输出有值的,其中中标人等放在多标段信息中形成数组,' \
- # '产品参数等放在产品信息中形成数组' \
- # '金额均以元为单位,时间格式为YYYY-MM-DD HH:MM:SS,无多余内容' \
- # '直接得到要素提取Json:'
- #
- # answer_prefix = '{"项目名称":'
- #
- # _text = "树村220千伏变电站110千伏送出工程-拆改工程(设计)中标候选人公示 招标项目名称: 树村220千伏变电站110千伏送出工程-拆改工程(设计) 招标编号: 91110108777085234D 招标项目编号: Z1101000311004312001 所属行业: 其他 所属地区: 北京市-海淀区 开始时间: 2026-03-06 结束时间: 2026-03-10 招标单位: 北京海融达投资建设有限公司 招标代理: 天行健项目管理咨询(北京)有限公司 发布时间: 2026-03-06 17:17:12.0 公示内容 公告文件.pdf"
- #
- # prompt = f"<|im_start|>user\n{_text}\n{instruction}<|im_end|>\n<|im_start|>assistant\n{answer_prefix}"
- #
- #
- # start_time = time.time()
- # response = client.chat.completions.create(
- # model="bidding-qwen-0.8B-lora-16K",
- # messages=[{"role": "user", "content": prompt}],
- # temperature=0,
- # stop=["<|im_end|>", "<|endoftext|>", "###"],
- # max_tokens=4096,
- # )
- # end_time = time.time()
- #
- # answer = response.choices[0].message.content
- # answer = answer_prefix + answer
- #
- # # 获取指标
- # usage = response.usage
- # prompt_tokens = usage.prompt_tokens
- # completion_tokens = usage.completion_tokens
- # total_duration = end_time - start_time
- # tps = completion_tokens / total_duration if total_duration > 0 else 0
- #
- # print(f"回答: {answer}")
- # print("-" * 30)
- # print(f"指标统计:")
- # print(f" - 输入 Tokens: {prompt_tokens}")
- # print(f" - 输出 Tokens: {completion_tokens}")
- # print(f" - 总耗时: {total_duration:.2f} s")
- # print(f" - 推理速度: {tps:.2f} tokens/s")
- url = "http://192.168.2.103:11434/api/generate"
- url2 = "http://192.168.2.103:11435/api/generate"
- url_docker = "http://192.168.2.103:11436/api/generate"
- url_docker_chat = "http://192.168.2.103:11436/api/chat"
- def infer_ollama_api(prompt, answer_prefix, model_name=None, temperature=0.05):
- # 构造 Prompt,直接把前缀放在最后
- # instruction = '提取以上招投标文档的关键信息,只输出有值的,其中中标人等放在多标段信息中形成数组,' \
- # '产品参数等放在产品信息中形成数组' \
- # '金额均以元为单位,时间格式为YYYY-MM-DD HH:MM:SS,无多余内容' \
- # '直接得到要素提取Json:'
- #
- # answer_prefix = '{"项目名称":'
- #
- # _text = "树村220千伏变电站110千伏送出工程-拆改工程(设计)中标候选人公示 招标项目名称: 树村220千伏变电站110千伏送出工程-拆改工程(设计) 招标编号: 91110108777085234D 招标项目编号: Z1101000311004312001 所属行业: 其他 所属地区: 北京市-海淀区 开始时间: 2026-03-06 结束时间: 2026-03-10 招标单位: 北京海融达投资建设有限公司 招标代理: 天行健项目管理咨询(北京)有限公司 发布时间: 2026-03-06 17:17:12.0 公示内容 公告文件.pdf"
- #
- # prompt = f"<|im_start|>user\n{_text}\n{instruction}<|im_end|>\n<|im_start|>assistant\n{answer_prefix}"
- #
- # if random.choice([0, 1]):
- # model_name = "bidding-qwen-0.8B-lora-16k:latest"
- # else:
- # model_name = "bidding-qwen-0.8B-lora-16k-2:latest"
- # print('infer_ollama_api random choice model_name', model_name)
- # if random.choice([0, 1]):
- # now_url = url
- # else:
- # now_url = url2
- # print('infer_ollama_api random choice now_url', now_url)
- now_url = url_docker
- if model_name is None:
- model_name = "bidding-qwen-0.8B-lora-16k:latest"
- # model_name = 'deepseek-r1:1.5b'
- print('infer_ollama_api', now_url, model_name)
- payload = {
- "model": model_name,
- # "model": "bidding-qwen-2B-lora-16k:latest",
- # "model": "qwen3.5:0.8b",
- "prompt": prompt,
- "stream": False,
- "raw": True,
- "options": {
- "num_ctx": int(1024*16),
- "temperature": temperature,
- "num_predict": 4096,
- "repeat_penalty": 1.18,
- "repeat_last_n": -1,
- "stop": ["<|im_end|>", "<|endoftext|>"],
- "seed": 42, # 固定随机种子
- "top_k": 50, # 只选概率最高的一个 Token(贪婪搜索)
- "top_p": 0.99,
- }
- }
- response = requests.post(now_url, json=payload)
- result = response.json()
- # 核心:手动把你的前缀和模型生成的后续内容拼起来
- final_content = answer_prefix + result["response"]
- # print(final_content)
- return final_content
|