luojiehua
/
BidiRag


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189
							"""
Speed-focused benchmark script - compares retrieval speed without LLM generation
Use this to quickly compare the performance of different retrieval methods
"""

import sys
import os
import time
import numpy as np
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from rich.console import Console
from rich.table import Table

from bdirag.document_processor import Document
from bdirag.embedding_models import SentenceTransformerEmbedding
from bdirag.vector_stores import FAISSStore
from bdirag.rag_methods import (
    NaiveRAG,
    HybridSearchRAG,
    MultiQueryRAG,
    HyDERAG,
    SelfRAG,
    StepBackRAG,
    BidFieldExtractionRAG,
    TableAwareRAG,
    EnsembleRAG,
    GraphRAG,
    BM25RAG,
    TFIDFRAG,
    KeywordRAG,
)
from examples.sample_data import SAMPLE_BIDDING_DOCS


class RetrievalBenchmark:
    def __init__(self):
        self.results = {}

    def run(self, methods, queries, iterations=3):
        for method_name, method in methods.items():
            print("\nBenchmarking {0}....format(method_name)")
            latencies = []

            for query in queries:
                query_latencies = []
                for _ in range(iterations):
                    start = time.time()
                    docs = method.retrieve(query, k=5)
                    elapsed = time.time() - start
                    query_latencies.append(elapsed)

                avg_latency = np.mean(query_latencies)
                latencies.append(avg_latency)
                print("  Query: {0}... -> {1}s.format(query[:50], avg_latency:.3f)")

            self.results[method_name] = {
                "latencies": latencies,
                "avg": np.mean(latencies),
                "std": np.std(latencies),
                "min": np.min(latencies),
                "max": np.max(latencies),
                "p50": np.percentile(latencies, 50),
                "p95": np.percentile(latencies, 95),
            }

        self.print_results()

    def print_results(self):
        console = Console()
        table = Table(title="Retrieval Speed Comparison")

        table.add_column("Method", style="cyan")
        table.add_column("Avg (s)", justify="right", style="green")
        table.add_column("Std (s)", justify="right", style="green")
        table.add_column("Min (s)", justify="right", style="yellow")
        table.add_column("Max (s)", justify="right", style="yellow")
        table.add_column("P50 (s)", justify="right", style="magenta")
        table.add_column("P95 (s)", justify="right", style="magenta")
        table.add_column("QPS", justify="right", style="blue")

        for name in sorted(self.results.keys(), key=lambda x: self.results[x]["avg"]):
            r = self.results[name]
            table.add_row(
                name,
                "{0}.format(r['avg']:.4f)",
                "{0}.format(r['std']:.4f)",
                "{0}.format(r['min']:.4f)",
                "{0}.format(r['max']:.4f)",
                "{0}.format(r['p50']:.4f)",
                "{0}.format(r['p95']:.4f)",
                "{0}.format(1/r['avg']:.1f)",
            )

        console.print(table)


def main():
    print("=" * 60)
    print("BidiRAG - Retrieval Speed Benchmark")
    print("=" * 60)

    # Load embedding model
    print("\nLoading embedding model...")
    embedding_model = SentenceTransformerEmbedding(
        model_name="BAAI/bge-large-zh-v1.5",
        device="cpu"
    )

    # Create vector store
    print("Creating vector store...")
    vector_store = FAISSStore(embedding_model=embedding_model)

    # Prepare documents
    documents = [
        Document(page_content=doc["content"], metadata={"title": doc["title"], "source": doc["title"]})
        for doc in SAMPLE_BIDDING_DOCS
    ]

    # Initialize methods (no LLM needed for pure retrieval)
    print("Initializing retrieval methods...")
    methods = {
        "BM25RAG": BM25RAG(
            embedding_model=embedding_model,
            vector_store=vector_store,
        ),
        "TFIDFRAG": TFIDFRAG(
            embedding_model=embedding_model,
            vector_store=vector_store,
        ),
        "KeywordRAG_BM25": KeywordRAG(
            embedding_model=embedding_model,
            vector_store=vector_store,
            search_method="bm25",
        ),
        "KeywordRAG_TFIDF": KeywordRAG(
            embedding_model=embedding_model,
            vector_store=vector_store,
            search_method="tfidf",
        ),
        "NaiveRAG": NaiveRAG(
            embedding_model=embedding_model,
            vector_store=vector_store,
        ),
        "HybridSearchRAG": HybridSearchRAG(
            embedding_model=embedding_model,
            vector_store=vector_store,
        ),
        "BidFieldExtractionRAG": BidFieldExtractionRAG(
            embedding_model=embedding_model,
            vector_store=vector_store,
        ),
        "TableAwareRAG": TableAwareRAG(
            embedding_model=embedding_model,
            vector_store=vector_store,
        ),
        "EnsembleRAG": EnsembleRAG(
            embedding_model=embedding_model,
            vector_store=vector_store,
        ),
    }

    # Index documents
    print("Indexing documents...")
    for method in methods.values():
        method.index_documents(documents)

    # Test queries
    test_queries = [
        "What is the project budget?",
        "What are the qualification requirements?",
        "When is the bid deadline?",
        "What is the warranty period?",
        "What are the payment terms?",
        "What is the evaluation method?",
        "What equipment is needed?",
        "What is the delivery time?",
    ]

    # Run benchmark
    print("\nRunning benchmark with {0} queries (3 iterations each)....format(len(test_queries))")
    benchmark = RetrievalBenchmark()
    benchmark.run(methods, test_queries, iterations=3)

    print("\nBenchmark complete!")


if __name__ == "__main__":
    main()