import os # Base paths BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) DATA_DIR = os.path.join(BASE_DIR, "data") DOCS_DIR = os.path.join(DATA_DIR, "documents") INDEX_DIR = os.path.join(DATA_DIR, "indexes") CACHE_DIR = os.path.join(DATA_DIR, "cache") OUTPUT_DIR = os.path.join(BASE_DIR, "output") # Create directories for d in [DATA_DIR, DOCS_DIR, INDEX_DIR, CACHE_DIR, OUTPUT_DIR]: os.makedirs(d, exist_ok=True) # Embedding settings EMBEDDING_MODEL_NAME = "BAAI/bge-large-zh-v1.5" EMBEDDING_DIMENSION = 1024 EMBEDDING_BATCH_SIZE = 32 # LLM settings LLM_MODEL_NAME = "gpt-4o" LLM_TEMPERATURE = 0.1 LLM_MAX_TOKENS = 4096 # Chunk settings CHUNK_SIZE = 512 CHUNK_OVERLAP = 50 # Vector store settings VECTOR_STORE_TYPE = "faiss" FAISS_INDEX_PATH = os.path.join(INDEX_DIR, "faiss_index") CHROMA_PERSIST_PATH = os.path.join(INDEX_DIR, "chroma_db") # Reranking settings RERANK_MODEL_NAME = "BAAI/bge-reranker-large" RERANK_TOP_K = 5 # Retrieval settings RETRIEVAL_TOP_K = 10 HYBRID_SEARCH_WEIGHT = 0.5 # HyDE settings HYDE_GENERATION_MODEL = "gpt-4o" HYDE_NUM_HYPOTHESES = 3 # Self-RAG settings SELF_RAG_RELEVANCE_THRESHOLD = 0.7 SELF_RAG_SUPPORT_THRESHOLD = 0.6 SELF_RAG_USEFULNESS_THRESHOLD = 0.7 # CRAG settings CRAG_CORRECTNESS_THRESHOLD = 0.7 CRAG_MAX_WEB_RESULTS = 5 # RAPTOR settings RAPTOR_MAX_CLUSTERS = 50 RAPTOR_SUMMARY_LENGTH = 256 # Bidding field extraction BID_FIELDS = [ "project_name", "project_code", "budget_amount", "currency", "bid_deadline", "bid_open_time", "bid_location", "purchaser_name", "purchaser_contact", "purchaser_phone", "agency_name", "agency_contact", "agency_phone", "qualification_requirements", "bid_bond_amount", "performance_bond_amount", "warranty_period", "delivery_time", "delivery_location", "payment_terms", "evaluation_method", "scope_of_work" ] # Bidding domain specific prompts BID_EXTRACTION_PROMPT = """你是一个招投标领域的专家。请根据提供的文档内容,提取以下字段信息: {fields} 文档内容: {context} 请以JSON格式返回提取结果。如果某个字段无法从文档中提取,请返回null。 """ # Logging settings LOG_LEVEL = "INFO" LOG_FORMAT = "{time:YYYY-MM-DD HH:mm:ss} | {level: <8} | {name}:{function}:{line} - {message}"