config.py 2.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
  1. import os
  2. # Base paths
  3. BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
  4. DATA_DIR = os.path.join(BASE_DIR, "data")
  5. DOCS_DIR = os.path.join(DATA_DIR, "documents")
  6. INDEX_DIR = os.path.join(DATA_DIR, "indexes")
  7. CACHE_DIR = os.path.join(DATA_DIR, "cache")
  8. OUTPUT_DIR = os.path.join(BASE_DIR, "output")
  9. # Create directories
  10. for d in [DATA_DIR, DOCS_DIR, INDEX_DIR, CACHE_DIR, OUTPUT_DIR]:
  11. os.makedirs(d, exist_ok=True)
  12. # Embedding settings
  13. EMBEDDING_MODEL_NAME = "BAAI/bge-large-zh-v1.5"
  14. EMBEDDING_DIMENSION = 1024
  15. EMBEDDING_BATCH_SIZE = 32
  16. # LLM settings
  17. LLM_MODEL_NAME = "gpt-4o"
  18. LLM_TEMPERATURE = 0.1
  19. LLM_MAX_TOKENS = 4096
  20. # Chunk settings
  21. CHUNK_SIZE = 512
  22. CHUNK_OVERLAP = 50
  23. # Vector store settings
  24. VECTOR_STORE_TYPE = "faiss"
  25. FAISS_INDEX_PATH = os.path.join(INDEX_DIR, "faiss_index")
  26. CHROMA_PERSIST_PATH = os.path.join(INDEX_DIR, "chroma_db")
  27. # Reranking settings
  28. RERANK_MODEL_NAME = "BAAI/bge-reranker-large"
  29. RERANK_TOP_K = 5
  30. # Retrieval settings
  31. RETRIEVAL_TOP_K = 10
  32. HYBRID_SEARCH_WEIGHT = 0.5
  33. # HyDE settings
  34. HYDE_GENERATION_MODEL = "gpt-4o"
  35. HYDE_NUM_HYPOTHESES = 3
  36. # Self-RAG settings
  37. SELF_RAG_RELEVANCE_THRESHOLD = 0.7
  38. SELF_RAG_SUPPORT_THRESHOLD = 0.6
  39. SELF_RAG_USEFULNESS_THRESHOLD = 0.7
  40. # CRAG settings
  41. CRAG_CORRECTNESS_THRESHOLD = 0.7
  42. CRAG_MAX_WEB_RESULTS = 5
  43. # RAPTOR settings
  44. RAPTOR_MAX_CLUSTERS = 50
  45. RAPTOR_SUMMARY_LENGTH = 256
  46. # Bidding field extraction
  47. BID_FIELDS = [
  48. "project_name", "project_code", "budget_amount", "currency",
  49. "bid_deadline", "bid_open_time", "bid_location",
  50. "purchaser_name", "purchaser_contact", "purchaser_phone",
  51. "agency_name", "agency_contact", "agency_phone",
  52. "qualification_requirements", "bid_bond_amount",
  53. "performance_bond_amount", "warranty_period",
  54. "delivery_time", "delivery_location", "payment_terms",
  55. "evaluation_method", "scope_of_work"
  56. ]
  57. # Bidding domain specific prompts
  58. BID_EXTRACTION_PROMPT = """你是一个招投标领域的专家。请根据提供的文档内容,提取以下字段信息:
  59. {fields}
  60. 文档内容:
  61. {context}
  62. 请以JSON格式返回提取结果。如果某个字段无法从文档中提取,请返回null。
  63. """
  64. # Logging settings
  65. LOG_LEVEL = "INFO"
  66. LOG_FORMAT = "<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>"