| 123456789101112131415161718192021222324252627282930313233343536373839404142434445 |
- # -*- coding: utf-8 -*-
- """调试 BM25 HTML Tree 检索"""
- import sys
- import os
- sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
- from bdirag.rag_methods.bm25_html_tree_rag import BM25HTMLTreeRAG
- # 创建测试 HTML
- html = """
- <html>
- <body>
- <h1>政府采购中标公告</h1>
- <table>
- <tr><td>采购人</td><td>XX市财政局</td></tr>
- <tr><td>中标人</td><td>XX科技有限公司</td></tr>
- <tr><td>中标金额</td><td>50万元</td></tr>
- </table>
- </body>
- </html>
- """
- print("创建 BM25HTMLTreeRAG 实例...")
- rag = BM25HTMLTreeRAG()
- print("\n构建索引...")
- rag.build_index(html)
- print(f"\n索引了 {len(rag.all_nodes)} 个节点")
- print("\n测试查询: '采购人'")
- results = rag.query("采购人", k=3)
- print(f"返回 {len(results)} 个结果")
- for i, (doc, score) in enumerate(results, 1):
- print(f"\n结果 {i} (score: {score:.3f}):")
- print(doc.page_content[:200])
- print("\n" + "=" * 80)
- print("\n测试查询: '中标人'")
- results = rag.query("中标人", k=3)
- print(f"返回 {len(results)} 个结果")
- for i, (doc, score) in enumerate(results, 1):
- print(f"\n结果 {i} (score: {score:.3f}):")
- print(doc.page_content[:200])
|