""" Bid field extraction demo - demonstrates structured information extraction from bidding announcements using RAG """ import sys import os import json sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from openai import OpenAI from bdirag.document_processor import Document from bdirag.embedding_models import SentenceTransformerEmbedding from bdirag.vector_stores import FAISSStore from bdirag.rag_methods import BidFieldExtractionRAG from examples.sample_data import SAMPLE_BIDDING_DOCS def main(): print("=" * 60) print("BidiRAG - Bid Field Extraction Demo") print("=" * 60) # Configuration LLM_API_KEY = os.getenv("OPENAI_API_KEY", "your-api-key-here") LLM_BASE_URL = os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1") LLM_MODEL = os.getenv("LLM_MODEL", "gpt-4o") # Load embedding model print("\n[1/3] Loading embedding model...") embedding_model = SentenceTransformerEmbedding( model_name="BAAI/bge-large-zh-v1.5", device="cpu" ) # Create vector store and index print("\n[2/3] Indexing bidding documents...") vector_store = FAISSStore(embedding_model=embedding_model) documents = [ Document(page_content=doc["content"], metadata={"title": doc["title"], "source": doc["title"]}) for doc in SAMPLE_BIDDING_DOCS ] llm_client = OpenAI(api_key=LLM_API_KEY, base_url=LLM_BASE_URL) rag = BidFieldExtractionRAG( embedding_model=embedding_model, vector_store=vector_store, llm_client=llm_client, llm_model=LLM_MODEL, ) rag.index_documents(documents) print(" Indexed {0} documents.format(len(documents))") # Extract fields for each bidding document print("\n[3/3] Extracting fields from bidding documents...") for doc in SAMPLE_BIDDING_DOCS: print("\n{0}.format('=' * 60)") print("Document: {0}.format(doc['title'])") print("{0}.format('=' * 60)") query = "Extract all information from {0}.format(doc['title'])" try: result = rag.query(query, k=10) print(f"\nExtracted JSON:") print(result.answer) print("\nLatency: {0}s.format(result.latency_total:.3f)") print("Retrieved {0} document chunks.format(len(result.retrieved_docs))") except Exception as e: print("ERROR: {0}.format(e)") print("\n\nExtraction complete!") if __name__ == "__main__": main()