# -*- coding: utf-8 -*-
"""调试 BM25 HTML Tree 检索"""
import sys
import os
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from bdirag.rag_methods.bm25_html_tree_rag import BM25HTMLTreeRAG
# 创建测试 HTML
html = """
政府采购中标公告
| 采购人 | XX市财政局 |
| 中标人 | XX科技有限公司 |
| 中标金额 | 50万元 |
"""
print("创建 BM25HTMLTreeRAG 实例...")
rag = BM25HTMLTreeRAG()
print("\n构建索引...")
rag.build_index(html)
print(f"\n索引了 {len(rag.all_nodes)} 个节点")
print("\n测试查询: '采购人'")
results = rag.query("采购人", k=3)
print(f"返回 {len(results)} 个结果")
for i, (doc, score) in enumerate(results, 1):
print(f"\n结果 {i} (score: {score:.3f}):")
print(doc.page_content[:200])
print("\n" + "=" * 80)
print("\n测试查询: '中标人'")
results = rag.query("中标人", k=3)
print(f"返回 {len(results)} 个结果")
for i, (doc, score) in enumerate(results, 1):
print(f"\n结果 {i} (score: {score:.3f}):")
print(doc.page_content[:200])