loader.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332
  1. # coding=utf-8
  2. """
  3. 配置加载模块
  4. 负责从 YAML 配置文件和环境变量加载配置。
  5. """
  6. import os
  7. from pathlib import Path
  8. from typing import Dict, Any, Optional
  9. import yaml
  10. from .config import parse_multi_account_config, validate_paired_configs
  11. def _get_env_bool(key: str, default: bool = False) -> Optional[bool]:
  12. """从环境变量获取布尔值,如果未设置返回 None"""
  13. value = os.environ.get(key, "").strip().lower()
  14. if not value:
  15. return None
  16. return value in ("true", "1")
  17. def _get_env_int(key: str, default: int = 0) -> int:
  18. """从环境变量获取整数值"""
  19. value = os.environ.get(key, "").strip()
  20. if not value:
  21. return default
  22. try:
  23. return int(value)
  24. except ValueError:
  25. return default
  26. def _get_env_str(key: str, default: str = "") -> str:
  27. """从环境变量获取字符串值"""
  28. return os.environ.get(key, "").strip() or default
  29. def _load_app_config(config_data: Dict) -> Dict:
  30. """加载应用配置"""
  31. app_config = config_data.get("app", {})
  32. return {
  33. "VERSION_CHECK_URL": app_config.get("version_check_url", ""),
  34. "SHOW_VERSION_UPDATE": app_config.get("show_version_update", True),
  35. "TIMEZONE": _get_env_str("TIMEZONE") or app_config.get("timezone", "Asia/Shanghai"),
  36. }
  37. def _load_crawler_config(config_data: Dict) -> Dict:
  38. """加载爬虫配置"""
  39. crawler_config = config_data.get("crawler", {})
  40. enable_crawler_env = _get_env_bool("ENABLE_CRAWLER")
  41. return {
  42. "REQUEST_INTERVAL": crawler_config.get("request_interval", 100),
  43. "USE_PROXY": crawler_config.get("use_proxy", False),
  44. "DEFAULT_PROXY": crawler_config.get("default_proxy", ""),
  45. "ENABLE_CRAWLER": enable_crawler_env if enable_crawler_env is not None else crawler_config.get("enable_crawler", True),
  46. }
  47. def _load_report_config(config_data: Dict) -> Dict:
  48. """加载报告配置"""
  49. report_config = config_data.get("report", {})
  50. # 环境变量覆盖
  51. sort_by_position_env = _get_env_bool("SORT_BY_POSITION_FIRST")
  52. reverse_content_env = _get_env_bool("REVERSE_CONTENT_ORDER")
  53. max_news_env = _get_env_int("MAX_NEWS_PER_KEYWORD")
  54. return {
  55. "REPORT_MODE": _get_env_str("REPORT_MODE") or report_config.get("mode", "daily"),
  56. "RANK_THRESHOLD": report_config.get("rank_threshold", 10),
  57. "SORT_BY_POSITION_FIRST": sort_by_position_env if sort_by_position_env is not None else report_config.get("sort_by_position_first", False),
  58. "MAX_NEWS_PER_KEYWORD": max_news_env or report_config.get("max_news_per_keyword", 0),
  59. "REVERSE_CONTENT_ORDER": reverse_content_env if reverse_content_env is not None else report_config.get("reverse_content_order", False),
  60. }
  61. def _load_notification_config(config_data: Dict) -> Dict:
  62. """加载通知配置"""
  63. notification = config_data.get("notification", {})
  64. enable_notification_env = _get_env_bool("ENABLE_NOTIFICATION")
  65. return {
  66. "ENABLE_NOTIFICATION": enable_notification_env if enable_notification_env is not None else notification.get("enable_notification", True),
  67. "MESSAGE_BATCH_SIZE": notification.get("message_batch_size", 4000),
  68. "DINGTALK_BATCH_SIZE": notification.get("dingtalk_batch_size", 20000),
  69. "FEISHU_BATCH_SIZE": notification.get("feishu_batch_size", 29000),
  70. "BARK_BATCH_SIZE": notification.get("bark_batch_size", 3600),
  71. "SLACK_BATCH_SIZE": notification.get("slack_batch_size", 4000),
  72. "BATCH_SEND_INTERVAL": notification.get("batch_send_interval", 1.0),
  73. "FEISHU_MESSAGE_SEPARATOR": notification.get("feishu_message_separator", "---"),
  74. "MAX_ACCOUNTS_PER_CHANNEL": _get_env_int("MAX_ACCOUNTS_PER_CHANNEL") or notification.get("max_accounts_per_channel", 3),
  75. }
  76. def _load_push_window_config(config_data: Dict) -> Dict:
  77. """加载推送窗口配置"""
  78. notification = config_data.get("notification", {})
  79. push_window = notification.get("push_window", {})
  80. time_range = push_window.get("time_range", {})
  81. enabled_env = _get_env_bool("PUSH_WINDOW_ENABLED")
  82. once_per_day_env = _get_env_bool("PUSH_WINDOW_ONCE_PER_DAY")
  83. return {
  84. "ENABLED": enabled_env if enabled_env is not None else push_window.get("enabled", False),
  85. "TIME_RANGE": {
  86. "START": _get_env_str("PUSH_WINDOW_START") or time_range.get("start", "08:00"),
  87. "END": _get_env_str("PUSH_WINDOW_END") or time_range.get("end", "22:00"),
  88. },
  89. "ONCE_PER_DAY": once_per_day_env if once_per_day_env is not None else push_window.get("once_per_day", True),
  90. }
  91. def _load_weight_config(config_data: Dict) -> Dict:
  92. """加载权重配置"""
  93. weight = config_data.get("weight", {})
  94. return {
  95. "RANK_WEIGHT": weight.get("rank_weight", 1.0),
  96. "FREQUENCY_WEIGHT": weight.get("frequency_weight", 1.0),
  97. "HOTNESS_WEIGHT": weight.get("hotness_weight", 1.0),
  98. }
  99. def _load_storage_config(config_data: Dict) -> Dict:
  100. """加载存储配置"""
  101. storage = config_data.get("storage", {})
  102. formats = storage.get("formats", {})
  103. local = storage.get("local", {})
  104. remote = storage.get("remote", {})
  105. pull = storage.get("pull", {})
  106. txt_enabled_env = _get_env_bool("STORAGE_TXT_ENABLED")
  107. html_enabled_env = _get_env_bool("STORAGE_HTML_ENABLED")
  108. pull_enabled_env = _get_env_bool("PULL_ENABLED")
  109. return {
  110. "BACKEND": _get_env_str("STORAGE_BACKEND") or storage.get("backend", "auto"),
  111. "FORMATS": {
  112. "SQLITE": formats.get("sqlite", True),
  113. "TXT": txt_enabled_env if txt_enabled_env is not None else formats.get("txt", True),
  114. "HTML": html_enabled_env if html_enabled_env is not None else formats.get("html", True),
  115. },
  116. "LOCAL": {
  117. "DATA_DIR": local.get("data_dir", "output"),
  118. "RETENTION_DAYS": _get_env_int("LOCAL_RETENTION_DAYS") or local.get("retention_days", 0),
  119. },
  120. "REMOTE": {
  121. "ENDPOINT_URL": _get_env_str("S3_ENDPOINT_URL") or remote.get("endpoint_url", ""),
  122. "BUCKET_NAME": _get_env_str("S3_BUCKET_NAME") or remote.get("bucket_name", ""),
  123. "ACCESS_KEY_ID": _get_env_str("S3_ACCESS_KEY_ID") or remote.get("access_key_id", ""),
  124. "SECRET_ACCESS_KEY": _get_env_str("S3_SECRET_ACCESS_KEY") or remote.get("secret_access_key", ""),
  125. "REGION": _get_env_str("S3_REGION") or remote.get("region", ""),
  126. "RETENTION_DAYS": _get_env_int("REMOTE_RETENTION_DAYS") or remote.get("retention_days", 0),
  127. },
  128. "PULL": {
  129. "ENABLED": pull_enabled_env if pull_enabled_env is not None else pull.get("enabled", False),
  130. "DAYS": _get_env_int("PULL_DAYS") or pull.get("days", 7),
  131. },
  132. }
  133. def _load_webhook_config(config_data: Dict) -> Dict:
  134. """加载 Webhook 配置"""
  135. notification = config_data.get("notification", {})
  136. webhooks = notification.get("webhooks", {})
  137. return {
  138. # 飞书
  139. "FEISHU_WEBHOOK_URL": _get_env_str("FEISHU_WEBHOOK_URL") or webhooks.get("feishu_url", ""),
  140. # 钉钉
  141. "DINGTALK_WEBHOOK_URL": _get_env_str("DINGTALK_WEBHOOK_URL") or webhooks.get("dingtalk_url", ""),
  142. # 企业微信
  143. "WEWORK_WEBHOOK_URL": _get_env_str("WEWORK_WEBHOOK_URL") or webhooks.get("wework_url", ""),
  144. "WEWORK_MSG_TYPE": _get_env_str("WEWORK_MSG_TYPE") or webhooks.get("wework_msg_type", "markdown"),
  145. # Telegram
  146. "TELEGRAM_BOT_TOKEN": _get_env_str("TELEGRAM_BOT_TOKEN") or webhooks.get("telegram_bot_token", ""),
  147. "TELEGRAM_CHAT_ID": _get_env_str("TELEGRAM_CHAT_ID") or webhooks.get("telegram_chat_id", ""),
  148. # 邮件
  149. "EMAIL_FROM": _get_env_str("EMAIL_FROM") or webhooks.get("email_from", ""),
  150. "EMAIL_PASSWORD": _get_env_str("EMAIL_PASSWORD") or webhooks.get("email_password", ""),
  151. "EMAIL_TO": _get_env_str("EMAIL_TO") or webhooks.get("email_to", ""),
  152. "EMAIL_SMTP_SERVER": _get_env_str("EMAIL_SMTP_SERVER") or webhooks.get("email_smtp_server", ""),
  153. "EMAIL_SMTP_PORT": _get_env_str("EMAIL_SMTP_PORT") or webhooks.get("email_smtp_port", ""),
  154. # ntfy
  155. "NTFY_SERVER_URL": _get_env_str("NTFY_SERVER_URL") or webhooks.get("ntfy_server_url") or "https://ntfy.sh",
  156. "NTFY_TOPIC": _get_env_str("NTFY_TOPIC") or webhooks.get("ntfy_topic", ""),
  157. "NTFY_TOKEN": _get_env_str("NTFY_TOKEN") or webhooks.get("ntfy_token", ""),
  158. # Bark
  159. "BARK_URL": _get_env_str("BARK_URL") or webhooks.get("bark_url", ""),
  160. # Slack
  161. "SLACK_WEBHOOK_URL": _get_env_str("SLACK_WEBHOOK_URL") or webhooks.get("slack_webhook_url", ""),
  162. }
  163. def _print_notification_sources(config: Dict) -> None:
  164. """打印通知渠道配置来源信息"""
  165. notification_sources = []
  166. max_accounts = config["MAX_ACCOUNTS_PER_CHANNEL"]
  167. if config["FEISHU_WEBHOOK_URL"]:
  168. accounts = parse_multi_account_config(config["FEISHU_WEBHOOK_URL"])
  169. count = min(len(accounts), max_accounts)
  170. source = "环境变量" if os.environ.get("FEISHU_WEBHOOK_URL") else "配置文件"
  171. notification_sources.append(f"飞书({source}, {count}个账号)")
  172. if config["DINGTALK_WEBHOOK_URL"]:
  173. accounts = parse_multi_account_config(config["DINGTALK_WEBHOOK_URL"])
  174. count = min(len(accounts), max_accounts)
  175. source = "环境变量" if os.environ.get("DINGTALK_WEBHOOK_URL") else "配置文件"
  176. notification_sources.append(f"钉钉({source}, {count}个账号)")
  177. if config["WEWORK_WEBHOOK_URL"]:
  178. accounts = parse_multi_account_config(config["WEWORK_WEBHOOK_URL"])
  179. count = min(len(accounts), max_accounts)
  180. source = "环境变量" if os.environ.get("WEWORK_WEBHOOK_URL") else "配置文件"
  181. notification_sources.append(f"企业微信({source}, {count}个账号)")
  182. if config["TELEGRAM_BOT_TOKEN"] and config["TELEGRAM_CHAT_ID"]:
  183. tokens = parse_multi_account_config(config["TELEGRAM_BOT_TOKEN"])
  184. chat_ids = parse_multi_account_config(config["TELEGRAM_CHAT_ID"])
  185. valid, count = validate_paired_configs(
  186. {"bot_token": tokens, "chat_id": chat_ids},
  187. "Telegram",
  188. required_keys=["bot_token", "chat_id"]
  189. )
  190. if valid and count > 0:
  191. count = min(count, max_accounts)
  192. token_source = "环境变量" if os.environ.get("TELEGRAM_BOT_TOKEN") else "配置文件"
  193. notification_sources.append(f"Telegram({token_source}, {count}个账号)")
  194. if config["EMAIL_FROM"] and config["EMAIL_PASSWORD"] and config["EMAIL_TO"]:
  195. from_source = "环境变量" if os.environ.get("EMAIL_FROM") else "配置文件"
  196. notification_sources.append(f"邮件({from_source})")
  197. if config["NTFY_SERVER_URL"] and config["NTFY_TOPIC"]:
  198. topics = parse_multi_account_config(config["NTFY_TOPIC"])
  199. tokens = parse_multi_account_config(config["NTFY_TOKEN"])
  200. if tokens:
  201. valid, count = validate_paired_configs(
  202. {"topic": topics, "token": tokens},
  203. "ntfy"
  204. )
  205. if valid and count > 0:
  206. count = min(count, max_accounts)
  207. server_source = "环境变量" if os.environ.get("NTFY_SERVER_URL") else "配置文件"
  208. notification_sources.append(f"ntfy({server_source}, {count}个账号)")
  209. else:
  210. count = min(len(topics), max_accounts)
  211. server_source = "环境变量" if os.environ.get("NTFY_SERVER_URL") else "配置文件"
  212. notification_sources.append(f"ntfy({server_source}, {count}个账号)")
  213. if config["BARK_URL"]:
  214. accounts = parse_multi_account_config(config["BARK_URL"])
  215. count = min(len(accounts), max_accounts)
  216. bark_source = "环境变量" if os.environ.get("BARK_URL") else "配置文件"
  217. notification_sources.append(f"Bark({bark_source}, {count}个账号)")
  218. if config["SLACK_WEBHOOK_URL"]:
  219. accounts = parse_multi_account_config(config["SLACK_WEBHOOK_URL"])
  220. count = min(len(accounts), max_accounts)
  221. slack_source = "环境变量" if os.environ.get("SLACK_WEBHOOK_URL") else "配置文件"
  222. notification_sources.append(f"Slack({slack_source}, {count}个账号)")
  223. if notification_sources:
  224. print(f"通知渠道配置来源: {', '.join(notification_sources)}")
  225. print(f"每个渠道最大账号数: {max_accounts}")
  226. else:
  227. print("未配置任何通知渠道")
  228. def load_config(config_path: Optional[str] = None) -> Dict[str, Any]:
  229. """
  230. 加载配置文件
  231. Args:
  232. config_path: 配置文件路径,默认从环境变量 CONFIG_PATH 获取或使用 config/config.yaml
  233. Returns:
  234. 包含所有配置的字典
  235. Raises:
  236. FileNotFoundError: 配置文件不存在
  237. """
  238. if config_path is None:
  239. config_path = os.environ.get("CONFIG_PATH", "config/config.yaml")
  240. if not Path(config_path).exists():
  241. raise FileNotFoundError(f"配置文件 {config_path} 不存在")
  242. with open(config_path, "r", encoding="utf-8") as f:
  243. config_data = yaml.safe_load(f)
  244. print(f"配置文件加载成功: {config_path}")
  245. # 合并所有配置
  246. config = {}
  247. # 应用配置
  248. config.update(_load_app_config(config_data))
  249. # 爬虫配置
  250. config.update(_load_crawler_config(config_data))
  251. # 报告配置
  252. config.update(_load_report_config(config_data))
  253. # 通知配置
  254. config.update(_load_notification_config(config_data))
  255. # 推送窗口配置
  256. config["PUSH_WINDOW"] = _load_push_window_config(config_data)
  257. # 权重配置
  258. config["WEIGHT_CONFIG"] = _load_weight_config(config_data)
  259. # 平台配置
  260. config["PLATFORMS"] = config_data.get("platforms", [])
  261. # 存储配置
  262. config["STORAGE"] = _load_storage_config(config_data)
  263. # Webhook 配置
  264. config.update(_load_webhook_config(config_data))
  265. # 打印通知渠道配置来源
  266. _print_notification_sources(config)
  267. return config