| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384 |
- import os
- # Base paths
- BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
- DATA_DIR = os.path.join(BASE_DIR, "data")
- DOCS_DIR = os.path.join(DATA_DIR, "documents")
- INDEX_DIR = os.path.join(DATA_DIR, "indexes")
- CACHE_DIR = os.path.join(DATA_DIR, "cache")
- OUTPUT_DIR = os.path.join(BASE_DIR, "output")
- # Create directories
- for d in [DATA_DIR, DOCS_DIR, INDEX_DIR, CACHE_DIR, OUTPUT_DIR]:
- os.makedirs(d, exist_ok=True)
- # Embedding settings
- EMBEDDING_MODEL_NAME = "BAAI/bge-large-zh-v1.5"
- EMBEDDING_DIMENSION = 1024
- EMBEDDING_BATCH_SIZE = 32
- # LLM settings
- LLM_MODEL_NAME = "gpt-4o"
- LLM_TEMPERATURE = 0.1
- LLM_MAX_TOKENS = 4096
- # Chunk settings
- CHUNK_SIZE = 512
- CHUNK_OVERLAP = 50
- # Vector store settings
- VECTOR_STORE_TYPE = "faiss"
- FAISS_INDEX_PATH = os.path.join(INDEX_DIR, "faiss_index")
- CHROMA_PERSIST_PATH = os.path.join(INDEX_DIR, "chroma_db")
- # Reranking settings
- RERANK_MODEL_NAME = "BAAI/bge-reranker-large"
- RERANK_TOP_K = 5
- # Retrieval settings
- RETRIEVAL_TOP_K = 10
- HYBRID_SEARCH_WEIGHT = 0.5
- # HyDE settings
- HYDE_GENERATION_MODEL = "gpt-4o"
- HYDE_NUM_HYPOTHESES = 3
- # Self-RAG settings
- SELF_RAG_RELEVANCE_THRESHOLD = 0.7
- SELF_RAG_SUPPORT_THRESHOLD = 0.6
- SELF_RAG_USEFULNESS_THRESHOLD = 0.7
- # CRAG settings
- CRAG_CORRECTNESS_THRESHOLD = 0.7
- CRAG_MAX_WEB_RESULTS = 5
- # RAPTOR settings
- RAPTOR_MAX_CLUSTERS = 50
- RAPTOR_SUMMARY_LENGTH = 256
- # Bidding field extraction
- BID_FIELDS = [
- "project_name", "project_code", "budget_amount", "currency",
- "bid_deadline", "bid_open_time", "bid_location",
- "purchaser_name", "purchaser_contact", "purchaser_phone",
- "agency_name", "agency_contact", "agency_phone",
- "qualification_requirements", "bid_bond_amount",
- "performance_bond_amount", "warranty_period",
- "delivery_time", "delivery_location", "payment_terms",
- "evaluation_method", "scope_of_work"
- ]
- # Bidding domain specific prompts
- BID_EXTRACTION_PROMPT = """你是一个招投标领域的专家。请根据提供的文档内容,提取以下字段信息:
- {fields}
- 文档内容:
- {context}
- 请以JSON格式返回提取结果。如果某个字段无法从文档中提取,请返回null。
- """
- # Logging settings
- LOG_LEVEL = "INFO"
- LOG_FORMAT = "<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>"
|