infer_ollama.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596
  1. import base64
  2. import random
  3. import re
  4. import traceback
  5. import pandas as pd
  6. import requests
  7. from bs4 import BeautifulSoup
  8. from openai import OpenAI
  9. import time
  10. # client = OpenAI(base_url="http://192.168.2.103:11434/v1", api_key="ollama")
  11. #
  12. # instruction = '提取以上招投标文档的关键信息,只输出有值的,其中中标人等放在多标段信息中形成数组,' \
  13. # '产品参数等放在产品信息中形成数组' \
  14. # '金额均以元为单位,时间格式为YYYY-MM-DD HH:MM:SS,无多余内容' \
  15. # '直接得到要素提取Json:'
  16. #
  17. # answer_prefix = '{"项目名称":'
  18. #
  19. # _text = "树村220千伏变电站110千伏送出工程-拆改工程(设计)中标候选人公示 招标项目名称: 树村220千伏变电站110千伏送出工程-拆改工程(设计) 招标编号: 91110108777085234D 招标项目编号: Z1101000311004312001 所属行业: 其他 所属地区: 北京市-海淀区 开始时间: 2026-03-06 结束时间: 2026-03-10 招标单位: 北京海融达投资建设有限公司 招标代理: 天行健项目管理咨询(北京)有限公司 发布时间: 2026-03-06 17:17:12.0 公示内容 公告文件.pdf"
  20. #
  21. # prompt = f"<|im_start|>user\n{_text}\n{instruction}<|im_end|>\n<|im_start|>assistant\n{answer_prefix}"
  22. #
  23. #
  24. # start_time = time.time()
  25. # response = client.chat.completions.create(
  26. # model="bidding-qwen-0.8B-lora-16K",
  27. # messages=[{"role": "user", "content": prompt}],
  28. # temperature=0,
  29. # stop=["<|im_end|>", "<|endoftext|>", "###"],
  30. # max_tokens=4096,
  31. # )
  32. # end_time = time.time()
  33. #
  34. # answer = response.choices[0].message.content
  35. # answer = answer_prefix + answer
  36. #
  37. # # 获取指标
  38. # usage = response.usage
  39. # prompt_tokens = usage.prompt_tokens
  40. # completion_tokens = usage.completion_tokens
  41. # total_duration = end_time - start_time
  42. # tps = completion_tokens / total_duration if total_duration > 0 else 0
  43. #
  44. # print(f"回答: {answer}")
  45. # print("-" * 30)
  46. # print(f"指标统计:")
  47. # print(f" - 输入 Tokens: {prompt_tokens}")
  48. # print(f" - 输出 Tokens: {completion_tokens}")
  49. # print(f" - 总耗时: {total_duration:.2f} s")
  50. # print(f" - 推理速度: {tps:.2f} tokens/s")
  51. url = "http://192.168.2.103:11434/api/generate"
  52. url2 = "http://192.168.2.103:11435/api/generate"
  53. url_docker = "http://192.168.2.103:11436/api/generate"
  54. url_docker_chat = "http://192.168.2.103:11436/api/chat"
  55. def infer_ollama_api(prompt, answer_prefix, model_name=None, temperature=0.05):
  56. now_url = url_docker
  57. if model_name is None:
  58. model_name = "bidding-qwen-0.8B-lora:latest"
  59. # model_name = 'deepseek-r1:1.5b'
  60. print('infer_ollama_api', now_url, model_name)
  61. payload = {
  62. "model": model_name,
  63. # "model": "bidding-qwen-2B-lora-16k:latest",
  64. # "model": "qwen3.5:0.8b",
  65. "prompt": prompt,
  66. "stream": False,
  67. "raw": True,
  68. "options": {
  69. "num_ctx": int(1024*16),
  70. "temperature": temperature,
  71. "num_predict": 4096,
  72. "repeat_penalty": 1.18,
  73. "repeat_last_n": -1,
  74. "stop": ["<|im_end|>", "<|endoftext|>"],
  75. "seed": 42, # 固定随机种子
  76. "top_k": 50, # 只选概率最高的一个 Token(贪婪搜索)
  77. "top_p": 0.99,
  78. }
  79. }
  80. response = requests.post(now_url, json=payload)
  81. result = response.json()
  82. # 核心:手动把你的前缀和模型生成的后续内容拼起来
  83. final_content = answer_prefix + result["response"]
  84. # print(final_content)
  85. return final_content