| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189 |
- """
- Speed-focused benchmark script - compares retrieval speed without LLM generation
- Use this to quickly compare the performance of different retrieval methods
- """
- import sys
- import os
- import time
- import numpy as np
- sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
- from rich.console import Console
- from rich.table import Table
- from bdirag.document_processor import Document
- from bdirag.embedding_models import SentenceTransformerEmbedding
- from bdirag.vector_stores import FAISSStore
- from bdirag.rag_methods import (
- NaiveRAG,
- HybridSearchRAG,
- MultiQueryRAG,
- HyDERAG,
- SelfRAG,
- StepBackRAG,
- BidFieldExtractionRAG,
- TableAwareRAG,
- EnsembleRAG,
- GraphRAG,
- BM25RAG,
- TFIDFRAG,
- KeywordRAG,
- )
- from examples.sample_data import SAMPLE_BIDDING_DOCS
- class RetrievalBenchmark:
- def __init__(self):
- self.results = {}
- def run(self, methods, queries, iterations=3):
- for method_name, method in methods.items():
- print("\nBenchmarking {0}....format(method_name)")
- latencies = []
- for query in queries:
- query_latencies = []
- for _ in range(iterations):
- start = time.time()
- docs = method.retrieve(query, k=5)
- elapsed = time.time() - start
- query_latencies.append(elapsed)
- avg_latency = np.mean(query_latencies)
- latencies.append(avg_latency)
- print(" Query: {0}... -> {1}s.format(query[:50], avg_latency:.3f)")
- self.results[method_name] = {
- "latencies": latencies,
- "avg": np.mean(latencies),
- "std": np.std(latencies),
- "min": np.min(latencies),
- "max": np.max(latencies),
- "p50": np.percentile(latencies, 50),
- "p95": np.percentile(latencies, 95),
- }
- self.print_results()
- def print_results(self):
- console = Console()
- table = Table(title="Retrieval Speed Comparison")
- table.add_column("Method", style="cyan")
- table.add_column("Avg (s)", justify="right", style="green")
- table.add_column("Std (s)", justify="right", style="green")
- table.add_column("Min (s)", justify="right", style="yellow")
- table.add_column("Max (s)", justify="right", style="yellow")
- table.add_column("P50 (s)", justify="right", style="magenta")
- table.add_column("P95 (s)", justify="right", style="magenta")
- table.add_column("QPS", justify="right", style="blue")
- for name in sorted(self.results.keys(), key=lambda x: self.results[x]["avg"]):
- r = self.results[name]
- table.add_row(
- name,
- "{0}.format(r['avg']:.4f)",
- "{0}.format(r['std']:.4f)",
- "{0}.format(r['min']:.4f)",
- "{0}.format(r['max']:.4f)",
- "{0}.format(r['p50']:.4f)",
- "{0}.format(r['p95']:.4f)",
- "{0}.format(1/r['avg']:.1f)",
- )
- console.print(table)
- def main():
- print("=" * 60)
- print("BidiRAG - Retrieval Speed Benchmark")
- print("=" * 60)
- # Load embedding model
- print("\nLoading embedding model...")
- embedding_model = SentenceTransformerEmbedding(
- model_name="BAAI/bge-large-zh-v1.5",
- device="cpu"
- )
- # Create vector store
- print("Creating vector store...")
- vector_store = FAISSStore(embedding_model=embedding_model)
- # Prepare documents
- documents = [
- Document(page_content=doc["content"], metadata={"title": doc["title"], "source": doc["title"]})
- for doc in SAMPLE_BIDDING_DOCS
- ]
- # Initialize methods (no LLM needed for pure retrieval)
- print("Initializing retrieval methods...")
- methods = {
- "BM25RAG": BM25RAG(
- embedding_model=embedding_model,
- vector_store=vector_store,
- ),
- "TFIDFRAG": TFIDFRAG(
- embedding_model=embedding_model,
- vector_store=vector_store,
- ),
- "KeywordRAG_BM25": KeywordRAG(
- embedding_model=embedding_model,
- vector_store=vector_store,
- search_method="bm25",
- ),
- "KeywordRAG_TFIDF": KeywordRAG(
- embedding_model=embedding_model,
- vector_store=vector_store,
- search_method="tfidf",
- ),
- "NaiveRAG": NaiveRAG(
- embedding_model=embedding_model,
- vector_store=vector_store,
- ),
- "HybridSearchRAG": HybridSearchRAG(
- embedding_model=embedding_model,
- vector_store=vector_store,
- ),
- "BidFieldExtractionRAG": BidFieldExtractionRAG(
- embedding_model=embedding_model,
- vector_store=vector_store,
- ),
- "TableAwareRAG": TableAwareRAG(
- embedding_model=embedding_model,
- vector_store=vector_store,
- ),
- "EnsembleRAG": EnsembleRAG(
- embedding_model=embedding_model,
- vector_store=vector_store,
- ),
- }
- # Index documents
- print("Indexing documents...")
- for method in methods.values():
- method.index_documents(documents)
- # Test queries
- test_queries = [
- "What is the project budget?",
- "What are the qualification requirements?",
- "When is the bid deadline?",
- "What is the warranty period?",
- "What are the payment terms?",
- "What is the evaluation method?",
- "What equipment is needed?",
- "What is the delivery time?",
- ]
- # Run benchmark
- print("\nRunning benchmark with {0} queries (3 iterations each)....format(len(test_queries))")
- benchmark = RetrievalBenchmark()
- benchmark.run(methods, test_queries, iterations=3)
- print("\nBenchmark complete!")
- if __name__ == "__main__":
- main()
|