| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115 |
- """
- Simple demo script to test basic RAG methods without full benchmark
- Good for quick validation and understanding
- """
- import sys
- import os
- sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
- from openai import OpenAI
- from bdirag.document_processor import Document
- from bdirag.embedding_models import SentenceTransformerEmbedding
- from bdirag.vector_stores import FAISSStore
- from bdirag.rag_methods import (
- NaiveRAG,
- BidFieldExtractionRAG,
- HyDERAG,
- )
- from examples.sample_data import SAMPLE_BIDDING_DOCS
- def main():
- print("=" * 60)
- print("BidiRAG - Quick Demo")
- print("=" * 60)
- # Configuration - modify these as needed
- LLM_API_KEY = os.getenv("OPENAI_API_KEY", "your-api-key-here")
- LLM_BASE_URL = os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1")
- LLM_MODEL = os.getenv("LLM_MODEL", "gpt-4o")
- # Step 1: Load embedding model
- print("\n[1/4] Loading embedding model...")
- embedding_model = SentenceTransformerEmbedding(
- model_name="BAAI/bge-large-zh-v1.5",
- device="cpu"
- )
- print(" Model loaded, dimension: {0}.format(embedding_model.dimension)")
- # Step 2: Create vector store and index documents
- print("\n[2/4] Creating vector store and indexing documents...")
- vector_store = FAISSStore(embedding_model=embedding_model)
- documents = [
- Document(page_content=doc["content"], metadata={"title": doc["title"], "source": doc["title"]})
- for doc in SAMPLE_BIDDING_DOCS
- ]
- print(" Prepared {0} documents.format(len(documents))")
- # Step 3: Initialize RAG methods
- print("\n[3/4] Initializing RAG methods...")
- llm_client = OpenAI(api_key=LLM_API_KEY, base_url=LLM_BASE_URL)
- naive_rag = NaiveRAG(
- embedding_model=embedding_model,
- vector_store=vector_store,
- llm_client=llm_client,
- llm_model=LLM_MODEL,
- )
- bid_rag = BidFieldExtractionRAG(
- embedding_model=embedding_model,
- vector_store=vector_store,
- llm_client=llm_client,
- llm_model=LLM_MODEL,
- )
- hyde_rag = HyDERAG(
- embedding_model=embedding_model,
- vector_store=vector_store,
- llm_client=llm_client,
- llm_model=LLM_MODEL,
- )
- naive_rag.index_documents(documents)
- bid_rag.index_documents(documents)
- hyde_rag.index_documents(documents)
- print(" Indexing complete")
- # Step 4: Test queries
- print("\n[4/4] Running test queries...")
- queries = [
- "What is the budget for the smart transportation project?",
- "List the qualification requirements for all projects",
- "What are the payment terms for the road construction project?",
- ]
- methods = [
- ("NaiveRAG", naive_rag),
- ("BidFieldExtractionRAG", bid_rag),
- ("HyDERAG", hyde_rag),
- ]
- for query in queries:
- print("\n{0}.format('=' * 60)")
- print("Query: {0}.format(query)")
- print("{0}.format('=' * 60)")
- for method_name, method in methods:
- print("\n--- {0} ---.format(method_name)")
- try:
- result = method.query(query, k=5)
- print("Answer: {0}.format(result.answer)")
- print("Latency: {0}s (retrieval: {1}s, generation: {2}s).format(result.latency_total:.3f, result.latency_retrieval:.3f, result.latency_generation:.3f)")
- print("Retrieved {0} documents.format(len(result.retrieved_docs))")
- except Exception as e:
- print("ERROR: {0}.format(e)")
- print("\n\nDemo complete!")
- if __name__ == "__main__":
- main()
|