loader.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456
  1. # coding=utf-8
  2. """
  3. 配置加载模块
  4. 负责从 YAML 配置文件和环境变量加载配置。
  5. """
  6. import os
  7. from pathlib import Path
  8. from typing import Dict, Any, Optional
  9. import yaml
  10. from .config import parse_multi_account_config, validate_paired_configs
  11. def _get_env_bool(key: str, default: bool = False) -> Optional[bool]:
  12. """从环境变量获取布尔值,如果未设置返回 None"""
  13. value = os.environ.get(key, "").strip().lower()
  14. if not value:
  15. return None
  16. return value in ("true", "1")
  17. def _get_env_int(key: str, default: int = 0) -> int:
  18. """从环境变量获取整数值"""
  19. value = os.environ.get(key, "").strip()
  20. if not value:
  21. return default
  22. try:
  23. return int(value)
  24. except ValueError:
  25. return default
  26. def _get_env_int_or_none(key: str) -> Optional[int]:
  27. """从环境变量获取整数值,未设置时返回 None"""
  28. value = os.environ.get(key, "").strip()
  29. if not value:
  30. return None
  31. try:
  32. return int(value)
  33. except ValueError:
  34. return None
  35. def _get_env_str(key: str, default: str = "") -> str:
  36. """从环境变量获取字符串值"""
  37. return os.environ.get(key, "").strip() or default
  38. def _load_app_config(config_data: Dict) -> Dict:
  39. """加载应用配置"""
  40. app_config = config_data.get("app", {})
  41. advanced = config_data.get("advanced", {})
  42. return {
  43. "VERSION_CHECK_URL": advanced.get("version_check_url", ""),
  44. "SHOW_VERSION_UPDATE": app_config.get("show_version_update", True),
  45. "TIMEZONE": _get_env_str("TIMEZONE") or app_config.get("timezone", "Asia/Shanghai"),
  46. "DEBUG": _get_env_bool("DEBUG") if _get_env_bool("DEBUG") is not None else advanced.get("debug", False),
  47. }
  48. def _load_crawler_config(config_data: Dict) -> Dict:
  49. """加载爬虫配置"""
  50. advanced = config_data.get("advanced", {})
  51. crawler_config = advanced.get("crawler", {})
  52. enable_crawler_env = _get_env_bool("ENABLE_CRAWLER")
  53. return {
  54. "REQUEST_INTERVAL": crawler_config.get("request_interval", 100),
  55. "USE_PROXY": crawler_config.get("use_proxy", False),
  56. "DEFAULT_PROXY": crawler_config.get("default_proxy", ""),
  57. "ENABLE_CRAWLER": enable_crawler_env if enable_crawler_env is not None else crawler_config.get("enabled", True),
  58. }
  59. def _load_report_config(config_data: Dict) -> Dict:
  60. """加载报告配置"""
  61. report_config = config_data.get("report", {})
  62. # 环境变量覆盖
  63. sort_by_position_env = _get_env_bool("SORT_BY_POSITION_FIRST")
  64. reverse_content_env = _get_env_bool("REVERSE_CONTENT_ORDER")
  65. max_news_env = _get_env_int("MAX_NEWS_PER_KEYWORD")
  66. display_mode_env = _get_env_str("DISPLAY_MODE")
  67. return {
  68. "REPORT_MODE": _get_env_str("REPORT_MODE") or report_config.get("mode", "daily"),
  69. "DISPLAY_MODE": display_mode_env or report_config.get("display_mode", "keyword"),
  70. "RANK_THRESHOLD": report_config.get("rank_threshold", 10),
  71. "SORT_BY_POSITION_FIRST": sort_by_position_env if sort_by_position_env is not None else report_config.get("sort_by_position_first", False),
  72. "MAX_NEWS_PER_KEYWORD": max_news_env or report_config.get("max_news_per_keyword", 0),
  73. "REVERSE_CONTENT_ORDER": reverse_content_env if reverse_content_env is not None else report_config.get("reverse_content_order", False),
  74. }
  75. def _load_notification_config(config_data: Dict) -> Dict:
  76. """加载通知配置"""
  77. notification = config_data.get("notification", {})
  78. advanced = config_data.get("advanced", {})
  79. batch_size = advanced.get("batch_size", {})
  80. enable_notification_env = _get_env_bool("ENABLE_NOTIFICATION")
  81. return {
  82. "ENABLE_NOTIFICATION": enable_notification_env if enable_notification_env is not None else notification.get("enabled", True),
  83. "MESSAGE_BATCH_SIZE": batch_size.get("default", 4000),
  84. "DINGTALK_BATCH_SIZE": batch_size.get("dingtalk", 20000),
  85. "FEISHU_BATCH_SIZE": batch_size.get("feishu", 29000),
  86. "BARK_BATCH_SIZE": batch_size.get("bark", 3600),
  87. "SLACK_BATCH_SIZE": batch_size.get("slack", 4000),
  88. "BATCH_SEND_INTERVAL": advanced.get("batch_send_interval", 1.0),
  89. "FEISHU_MESSAGE_SEPARATOR": advanced.get("feishu_message_separator", "---"),
  90. "MAX_ACCOUNTS_PER_CHANNEL": _get_env_int("MAX_ACCOUNTS_PER_CHANNEL") or advanced.get("max_accounts_per_channel", 3),
  91. }
  92. def _load_push_window_config(config_data: Dict) -> Dict:
  93. """加载推送窗口配置"""
  94. notification = config_data.get("notification", {})
  95. push_window = notification.get("push_window", {})
  96. enabled_env = _get_env_bool("PUSH_WINDOW_ENABLED")
  97. once_per_day_env = _get_env_bool("PUSH_WINDOW_ONCE_PER_DAY")
  98. return {
  99. "ENABLED": enabled_env if enabled_env is not None else push_window.get("enabled", False),
  100. "TIME_RANGE": {
  101. "START": _get_env_str("PUSH_WINDOW_START") or push_window.get("start", "08:00"),
  102. "END": _get_env_str("PUSH_WINDOW_END") or push_window.get("end", "22:00"),
  103. },
  104. "ONCE_PER_DAY": once_per_day_env if once_per_day_env is not None else push_window.get("once_per_day", True),
  105. }
  106. def _load_weight_config(config_data: Dict) -> Dict:
  107. """加载权重配置"""
  108. advanced = config_data.get("advanced", {})
  109. weight = advanced.get("weight", {})
  110. return {
  111. "RANK_WEIGHT": weight.get("rank", 0.6),
  112. "FREQUENCY_WEIGHT": weight.get("frequency", 0.3),
  113. "HOTNESS_WEIGHT": weight.get("hotness", 0.1),
  114. }
  115. def _load_rss_config(config_data: Dict) -> Dict:
  116. """加载 RSS 配置"""
  117. rss = config_data.get("rss", {})
  118. advanced = config_data.get("advanced", {})
  119. advanced_rss = advanced.get("rss", {})
  120. advanced_crawler = advanced.get("crawler", {})
  121. # RSS 代理配置:优先使用 RSS 专属代理,否则复用 crawler 的 default_proxy
  122. rss_proxy_url = advanced_rss.get("proxy_url", "") or advanced_crawler.get("default_proxy", "")
  123. # 新鲜度过滤配置
  124. freshness_filter = rss.get("freshness_filter", {})
  125. # 验证并设置 max_age_days 默认值
  126. raw_max_age = freshness_filter.get("max_age_days", 3)
  127. try:
  128. max_age_days = int(raw_max_age)
  129. if max_age_days < 0:
  130. print(f"[警告] RSS freshness_filter.max_age_days 为负数 ({max_age_days}),使用默认值 3")
  131. max_age_days = 3
  132. except (ValueError, TypeError):
  133. print(f"[警告] RSS freshness_filter.max_age_days 格式错误 ({raw_max_age}),使用默认值 3")
  134. max_age_days = 3
  135. # RSS 配置直接从 config.yaml 读取,不再支持环境变量
  136. return {
  137. "ENABLED": rss.get("enabled", False),
  138. "REQUEST_INTERVAL": advanced_rss.get("request_interval", 2000),
  139. "TIMEOUT": advanced_rss.get("timeout", 15),
  140. "USE_PROXY": advanced_rss.get("use_proxy", False),
  141. "PROXY_URL": rss_proxy_url,
  142. "FEEDS": rss.get("feeds", []),
  143. "FRESHNESS_FILTER": {
  144. "ENABLED": freshness_filter.get("enabled", True), # 默认启用
  145. "MAX_AGE_DAYS": max_age_days,
  146. },
  147. "NOTIFICATION": {
  148. "ENABLED": advanced_rss.get("notification_enabled", False),
  149. },
  150. }
  151. def _load_standalone_display_config(config_data: Dict) -> Dict:
  152. """加载独立展示区配置"""
  153. notification = config_data.get("notification", {})
  154. standalone = notification.get("standalone_display", {})
  155. return {
  156. "ENABLED": standalone.get("enabled", False),
  157. "PLATFORMS": standalone.get("platforms", []),
  158. "RSS_FEEDS": standalone.get("rss_feeds", []),
  159. "MAX_ITEMS": standalone.get("max_items", 20),
  160. }
  161. def _load_ai_analysis_config(config_data: Dict) -> Dict:
  162. """加载 AI 分析配置"""
  163. ai_config = config_data.get("ai_analysis", {})
  164. enabled_env = _get_env_bool("AI_ANALYSIS_ENABLED")
  165. timeout_env = _get_env_int_or_none("AI_TIMEOUT")
  166. return {
  167. "ENABLED": enabled_env if enabled_env is not None else ai_config.get("enabled", False),
  168. "PROVIDER": _get_env_str("AI_PROVIDER") or ai_config.get("provider", "deepseek"),
  169. "API_KEY": _get_env_str("AI_API_KEY") or ai_config.get("api_key", ""),
  170. "MODEL": _get_env_str("AI_MODEL") or ai_config.get("model", "deepseek-chat"),
  171. "BASE_URL": _get_env_str("AI_BASE_URL") or ai_config.get("base_url", ""),
  172. "TIMEOUT": timeout_env if timeout_env is not None else ai_config.get("timeout", 90),
  173. "PUSH_MODE": _get_env_str("AI_PUSH_MODE") or ai_config.get("push_mode", "both"),
  174. "MAX_NEWS_FOR_ANALYSIS": ai_config.get("max_news_for_analysis", 50),
  175. "INCLUDE_RSS": ai_config.get("include_rss", True),
  176. "PROMPT_FILE": ai_config.get("prompt_file", "ai_analysis_prompt.txt"),
  177. }
  178. def _load_storage_config(config_data: Dict) -> Dict:
  179. """加载存储配置"""
  180. storage = config_data.get("storage", {})
  181. formats = storage.get("formats", {})
  182. local = storage.get("local", {})
  183. remote = storage.get("remote", {})
  184. pull = storage.get("pull", {})
  185. txt_enabled_env = _get_env_bool("STORAGE_TXT_ENABLED")
  186. html_enabled_env = _get_env_bool("STORAGE_HTML_ENABLED")
  187. pull_enabled_env = _get_env_bool("PULL_ENABLED")
  188. return {
  189. "BACKEND": _get_env_str("STORAGE_BACKEND") or storage.get("backend", "auto"),
  190. "FORMATS": {
  191. "SQLITE": formats.get("sqlite", True),
  192. "TXT": txt_enabled_env if txt_enabled_env is not None else formats.get("txt", True),
  193. "HTML": html_enabled_env if html_enabled_env is not None else formats.get("html", True),
  194. },
  195. "LOCAL": {
  196. "DATA_DIR": local.get("data_dir", "output"),
  197. "RETENTION_DAYS": _get_env_int("LOCAL_RETENTION_DAYS") or local.get("retention_days", 0),
  198. },
  199. "REMOTE": {
  200. "ENDPOINT_URL": _get_env_str("S3_ENDPOINT_URL") or remote.get("endpoint_url", ""),
  201. "BUCKET_NAME": _get_env_str("S3_BUCKET_NAME") or remote.get("bucket_name", ""),
  202. "ACCESS_KEY_ID": _get_env_str("S3_ACCESS_KEY_ID") or remote.get("access_key_id", ""),
  203. "SECRET_ACCESS_KEY": _get_env_str("S3_SECRET_ACCESS_KEY") or remote.get("secret_access_key", ""),
  204. "REGION": _get_env_str("S3_REGION") or remote.get("region", ""),
  205. "RETENTION_DAYS": _get_env_int("REMOTE_RETENTION_DAYS") or remote.get("retention_days", 0),
  206. },
  207. "PULL": {
  208. "ENABLED": pull_enabled_env if pull_enabled_env is not None else pull.get("enabled", False),
  209. "DAYS": _get_env_int("PULL_DAYS") or pull.get("days", 7),
  210. },
  211. }
  212. def _load_webhook_config(config_data: Dict) -> Dict:
  213. """加载 Webhook 配置"""
  214. notification = config_data.get("notification", {})
  215. channels = notification.get("channels", {})
  216. # 各渠道配置
  217. feishu = channels.get("feishu", {})
  218. dingtalk = channels.get("dingtalk", {})
  219. wework = channels.get("wework", {})
  220. telegram = channels.get("telegram", {})
  221. email = channels.get("email", {})
  222. ntfy = channels.get("ntfy", {})
  223. bark = channels.get("bark", {})
  224. slack = channels.get("slack", {})
  225. generic = channels.get("generic_webhook", {})
  226. return {
  227. # 飞书
  228. "FEISHU_WEBHOOK_URL": _get_env_str("FEISHU_WEBHOOK_URL") or feishu.get("webhook_url", ""),
  229. # 钉钉
  230. "DINGTALK_WEBHOOK_URL": _get_env_str("DINGTALK_WEBHOOK_URL") or dingtalk.get("webhook_url", ""),
  231. # 企业微信
  232. "WEWORK_WEBHOOK_URL": _get_env_str("WEWORK_WEBHOOK_URL") or wework.get("webhook_url", ""),
  233. "WEWORK_MSG_TYPE": _get_env_str("WEWORK_MSG_TYPE") or wework.get("msg_type", "markdown"),
  234. # Telegram
  235. "TELEGRAM_BOT_TOKEN": _get_env_str("TELEGRAM_BOT_TOKEN") or telegram.get("bot_token", ""),
  236. "TELEGRAM_CHAT_ID": _get_env_str("TELEGRAM_CHAT_ID") or telegram.get("chat_id", ""),
  237. # 邮件
  238. "EMAIL_FROM": _get_env_str("EMAIL_FROM") or email.get("from", ""),
  239. "EMAIL_PASSWORD": _get_env_str("EMAIL_PASSWORD") or email.get("password", ""),
  240. "EMAIL_TO": _get_env_str("EMAIL_TO") or email.get("to", ""),
  241. "EMAIL_SMTP_SERVER": _get_env_str("EMAIL_SMTP_SERVER") or email.get("smtp_server", ""),
  242. "EMAIL_SMTP_PORT": _get_env_str("EMAIL_SMTP_PORT") or email.get("smtp_port", ""),
  243. # ntfy
  244. "NTFY_SERVER_URL": _get_env_str("NTFY_SERVER_URL") or ntfy.get("server_url") or "https://ntfy.sh",
  245. "NTFY_TOPIC": _get_env_str("NTFY_TOPIC") or ntfy.get("topic", ""),
  246. "NTFY_TOKEN": _get_env_str("NTFY_TOKEN") or ntfy.get("token", ""),
  247. # Bark
  248. "BARK_URL": _get_env_str("BARK_URL") or bark.get("url", ""),
  249. # Slack
  250. "SLACK_WEBHOOK_URL": _get_env_str("SLACK_WEBHOOK_URL") or slack.get("webhook_url", ""),
  251. # 通用 Webhook
  252. "GENERIC_WEBHOOK_URL": _get_env_str("GENERIC_WEBHOOK_URL") or generic.get("url", ""),
  253. "GENERIC_WEBHOOK_TEMPLATE": _get_env_str("GENERIC_WEBHOOK_TEMPLATE") or generic.get("template", ""),
  254. }
  255. def _print_notification_sources(config: Dict) -> None:
  256. """打印通知渠道配置来源信息"""
  257. notification_sources = []
  258. max_accounts = config["MAX_ACCOUNTS_PER_CHANNEL"]
  259. if config["FEISHU_WEBHOOK_URL"]:
  260. accounts = parse_multi_account_config(config["FEISHU_WEBHOOK_URL"])
  261. count = min(len(accounts), max_accounts)
  262. source = "环境变量" if os.environ.get("FEISHU_WEBHOOK_URL") else "配置文件"
  263. notification_sources.append(f"飞书({source}, {count}个账号)")
  264. if config["DINGTALK_WEBHOOK_URL"]:
  265. accounts = parse_multi_account_config(config["DINGTALK_WEBHOOK_URL"])
  266. count = min(len(accounts), max_accounts)
  267. source = "环境变量" if os.environ.get("DINGTALK_WEBHOOK_URL") else "配置文件"
  268. notification_sources.append(f"钉钉({source}, {count}个账号)")
  269. if config["WEWORK_WEBHOOK_URL"]:
  270. accounts = parse_multi_account_config(config["WEWORK_WEBHOOK_URL"])
  271. count = min(len(accounts), max_accounts)
  272. source = "环境变量" if os.environ.get("WEWORK_WEBHOOK_URL") else "配置文件"
  273. notification_sources.append(f"企业微信({source}, {count}个账号)")
  274. if config["TELEGRAM_BOT_TOKEN"] and config["TELEGRAM_CHAT_ID"]:
  275. tokens = parse_multi_account_config(config["TELEGRAM_BOT_TOKEN"])
  276. chat_ids = parse_multi_account_config(config["TELEGRAM_CHAT_ID"])
  277. valid, count = validate_paired_configs(
  278. {"bot_token": tokens, "chat_id": chat_ids},
  279. "Telegram",
  280. required_keys=["bot_token", "chat_id"]
  281. )
  282. if valid and count > 0:
  283. count = min(count, max_accounts)
  284. token_source = "环境变量" if os.environ.get("TELEGRAM_BOT_TOKEN") else "配置文件"
  285. notification_sources.append(f"Telegram({token_source}, {count}个账号)")
  286. if config["EMAIL_FROM"] and config["EMAIL_PASSWORD"] and config["EMAIL_TO"]:
  287. from_source = "环境变量" if os.environ.get("EMAIL_FROM") else "配置文件"
  288. notification_sources.append(f"邮件({from_source})")
  289. if config["NTFY_SERVER_URL"] and config["NTFY_TOPIC"]:
  290. topics = parse_multi_account_config(config["NTFY_TOPIC"])
  291. tokens = parse_multi_account_config(config["NTFY_TOKEN"])
  292. if tokens:
  293. valid, count = validate_paired_configs(
  294. {"topic": topics, "token": tokens},
  295. "ntfy"
  296. )
  297. if valid and count > 0:
  298. count = min(count, max_accounts)
  299. server_source = "环境变量" if os.environ.get("NTFY_SERVER_URL") else "配置文件"
  300. notification_sources.append(f"ntfy({server_source}, {count}个账号)")
  301. else:
  302. count = min(len(topics), max_accounts)
  303. server_source = "环境变量" if os.environ.get("NTFY_SERVER_URL") else "配置文件"
  304. notification_sources.append(f"ntfy({server_source}, {count}个账号)")
  305. if config["BARK_URL"]:
  306. accounts = parse_multi_account_config(config["BARK_URL"])
  307. count = min(len(accounts), max_accounts)
  308. bark_source = "环境变量" if os.environ.get("BARK_URL") else "配置文件"
  309. notification_sources.append(f"Bark({bark_source}, {count}个账号)")
  310. if config["SLACK_WEBHOOK_URL"]:
  311. accounts = parse_multi_account_config(config["SLACK_WEBHOOK_URL"])
  312. count = min(len(accounts), max_accounts)
  313. slack_source = "环境变量" if os.environ.get("SLACK_WEBHOOK_URL") else "配置文件"
  314. notification_sources.append(f"Slack({slack_source}, {count}个账号)")
  315. if config.get("GENERIC_WEBHOOK_URL"):
  316. accounts = parse_multi_account_config(config["GENERIC_WEBHOOK_URL"])
  317. count = min(len(accounts), max_accounts)
  318. source = "环境变量" if os.environ.get("GENERIC_WEBHOOK_URL") else "配置文件"
  319. notification_sources.append(f"通用Webhook({source}, {count}个账号)")
  320. if notification_sources:
  321. print(f"通知渠道配置来源: {', '.join(notification_sources)}")
  322. print(f"每个渠道最大账号数: {max_accounts}")
  323. else:
  324. print("未配置任何通知渠道")
  325. def load_config(config_path: Optional[str] = None) -> Dict[str, Any]:
  326. """
  327. 加载配置文件
  328. Args:
  329. config_path: 配置文件路径,默认从环境变量 CONFIG_PATH 获取或使用 config/config.yaml
  330. Returns:
  331. 包含所有配置的字典
  332. Raises:
  333. FileNotFoundError: 配置文件不存在
  334. """
  335. if config_path is None:
  336. config_path = os.environ.get("CONFIG_PATH", "config/config.yaml")
  337. if not Path(config_path).exists():
  338. raise FileNotFoundError(f"配置文件 {config_path} 不存在")
  339. with open(config_path, "r", encoding="utf-8") as f:
  340. config_data = yaml.safe_load(f)
  341. print(f"配置文件加载成功: {config_path}")
  342. # 合并所有配置
  343. config = {}
  344. # 应用配置
  345. config.update(_load_app_config(config_data))
  346. # 爬虫配置
  347. config.update(_load_crawler_config(config_data))
  348. # 报告配置
  349. config.update(_load_report_config(config_data))
  350. # 通知配置
  351. config.update(_load_notification_config(config_data))
  352. # 推送窗口配置
  353. config["PUSH_WINDOW"] = _load_push_window_config(config_data)
  354. # 权重配置
  355. config["WEIGHT_CONFIG"] = _load_weight_config(config_data)
  356. # 平台配置
  357. config["PLATFORMS"] = config_data.get("platforms", [])
  358. # RSS 配置
  359. config["RSS"] = _load_rss_config(config_data)
  360. # AI 分析配置
  361. config["AI_ANALYSIS"] = _load_ai_analysis_config(config_data)
  362. # 独立展示区配置
  363. config["STANDALONE_DISPLAY"] = _load_standalone_display_config(config_data)
  364. # 存储配置
  365. config["STORAGE"] = _load_storage_config(config_data)
  366. # Webhook 配置
  367. config.update(_load_webhook_config(config_data))
  368. # 打印通知渠道配置来源
  369. _print_notification_sources(config)
  370. return config