loader.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611
  1. # coding=utf-8
  2. """
  3. 配置加载模块
  4. 负责从 YAML 配置文件和环境变量加载配置。
  5. """
  6. import os
  7. from pathlib import Path
  8. from typing import Dict, Any, Optional
  9. import yaml
  10. from .config import parse_multi_account_config, validate_paired_configs
  11. from trendradar.utils.time import DEFAULT_TIMEZONE
  12. def _get_env_bool(key: str) -> Optional[bool]:
  13. """从环境变量获取布尔值,如果未设置返回 None"""
  14. value = os.environ.get(key, "").strip().lower()
  15. if not value:
  16. return None
  17. return value in ("true", "1")
  18. def _get_env_int(key: str, default: int = 0) -> int:
  19. """从环境变量获取整数值"""
  20. value = os.environ.get(key, "").strip()
  21. if not value:
  22. return default
  23. try:
  24. return int(value)
  25. except ValueError:
  26. return default
  27. def _get_env_int_or_none(key: str) -> Optional[int]:
  28. """从环境变量获取整数值,未设置时返回 None"""
  29. value = os.environ.get(key, "").strip()
  30. if not value:
  31. return None
  32. try:
  33. return int(value)
  34. except ValueError:
  35. return None
  36. def _get_env_str(key: str, default: str = "") -> str:
  37. """从环境变量获取字符串值"""
  38. return os.environ.get(key, "").strip() or default
  39. def _load_app_config(config_data: Dict) -> Dict:
  40. """加载应用配置"""
  41. app_config = config_data.get("app", {})
  42. advanced = config_data.get("advanced", {})
  43. return {
  44. "VERSION_CHECK_URL": advanced.get("version_check_url", ""),
  45. "CONFIGS_VERSION_CHECK_URL": advanced.get("configs_version_check_url", ""),
  46. "SHOW_VERSION_UPDATE": app_config.get("show_version_update", True),
  47. "TIMEZONE": _get_env_str("TIMEZONE") or app_config.get("timezone", DEFAULT_TIMEZONE),
  48. "DEBUG": _get_env_bool("DEBUG") if _get_env_bool("DEBUG") is not None else advanced.get("debug", False),
  49. }
  50. def _load_crawler_config(config_data: Dict) -> Dict:
  51. """加载爬虫配置"""
  52. advanced = config_data.get("advanced", {})
  53. crawler_config = advanced.get("crawler", {})
  54. platforms_config = config_data.get("platforms", {})
  55. return {
  56. "REQUEST_INTERVAL": crawler_config.get("request_interval", 100),
  57. "USE_PROXY": crawler_config.get("use_proxy", False),
  58. "DEFAULT_PROXY": crawler_config.get("default_proxy", ""),
  59. "ENABLE_CRAWLER": platforms_config.get("enabled", True),
  60. }
  61. def _load_report_config(config_data: Dict) -> Dict:
  62. """加载报告配置"""
  63. report_config = config_data.get("report", {})
  64. # 环境变量覆盖
  65. sort_by_position_env = _get_env_bool("SORT_BY_POSITION_FIRST")
  66. max_news_env = _get_env_int("MAX_NEWS_PER_KEYWORD")
  67. return {
  68. "REPORT_MODE": report_config.get("mode", "daily"),
  69. "DISPLAY_MODE": report_config.get("display_mode", "keyword"),
  70. "RANK_THRESHOLD": report_config.get("rank_threshold", 10),
  71. "SORT_BY_POSITION_FIRST": sort_by_position_env if sort_by_position_env is not None else report_config.get("sort_by_position_first", False),
  72. "MAX_NEWS_PER_KEYWORD": max_news_env or report_config.get("max_news_per_keyword", 0),
  73. }
  74. def _load_notification_config(config_data: Dict) -> Dict:
  75. """加载通知配置"""
  76. notification = config_data.get("notification", {})
  77. advanced = config_data.get("advanced", {})
  78. batch_size = advanced.get("batch_size", {})
  79. return {
  80. "ENABLE_NOTIFICATION": notification.get("enabled", True),
  81. "MESSAGE_BATCH_SIZE": batch_size.get("default", 4000),
  82. "DINGTALK_BATCH_SIZE": batch_size.get("dingtalk", 20000),
  83. "FEISHU_BATCH_SIZE": batch_size.get("feishu", 29000),
  84. "BARK_BATCH_SIZE": batch_size.get("bark", 3600),
  85. "SLACK_BATCH_SIZE": batch_size.get("slack", 4000),
  86. "BATCH_SEND_INTERVAL": advanced.get("batch_send_interval", 1.0),
  87. "FEISHU_MESSAGE_SEPARATOR": advanced.get("feishu_message_separator", "---"),
  88. "MAX_ACCOUNTS_PER_CHANNEL": _get_env_int("MAX_ACCOUNTS_PER_CHANNEL") or advanced.get("max_accounts_per_channel", 3),
  89. }
  90. def _load_schedule_config(config_data: Dict) -> Dict:
  91. """
  92. 加载统一调度配置
  93. 从 config.yaml 的 schedule 段读取,支持环境变量覆盖。
  94. """
  95. schedule = config_data.get("schedule", {})
  96. # 环境变量覆盖
  97. enabled_env = _get_env_bool("SCHEDULE_ENABLED")
  98. preset_env = _get_env_str("SCHEDULE_PRESET")
  99. enabled = enabled_env if enabled_env is not None else schedule.get("enabled", False)
  100. preset = preset_env or schedule.get("preset", "always_on")
  101. return {
  102. "enabled": enabled,
  103. "preset": preset,
  104. }
  105. def _load_timeline_data(config_dir: str = "config") -> Dict:
  106. """
  107. 加载 timeline.yaml
  108. Args:
  109. config_dir: 配置目录路径
  110. Returns:
  111. timeline.yaml 的完整数据,找不到时返回空模板
  112. """
  113. timeline_path = Path(config_dir) / "timeline.yaml"
  114. if not timeline_path.exists():
  115. print(f"[调度] timeline.yaml 未找到: {timeline_path},使用空模板")
  116. return {
  117. "presets": {},
  118. "custom": {
  119. "default": {
  120. "collect": True,
  121. "analyze": False,
  122. "push": False,
  123. "report_mode": "current",
  124. "ai_mode": "follow_report",
  125. "once": {"analyze": False, "push": False},
  126. },
  127. "periods": {},
  128. "day_plans": {"all_day": {"periods": []}},
  129. "week_map": {i: "all_day" for i in range(1, 8)},
  130. },
  131. }
  132. with open(timeline_path, "r", encoding="utf-8") as f:
  133. data = yaml.safe_load(f)
  134. print(f"[调度] timeline.yaml 加载成功: {timeline_path}")
  135. return data or {}
  136. def _load_weight_config(config_data: Dict) -> Dict:
  137. """加载权重配置"""
  138. advanced = config_data.get("advanced", {})
  139. weight = advanced.get("weight", {})
  140. return {
  141. "RANK_WEIGHT": weight.get("rank", 0.6),
  142. "FREQUENCY_WEIGHT": weight.get("frequency", 0.3),
  143. "HOTNESS_WEIGHT": weight.get("hotness", 0.1),
  144. }
  145. def _load_rss_config(config_data: Dict) -> Dict:
  146. """加载 RSS 配置"""
  147. rss = config_data.get("rss", {})
  148. advanced = config_data.get("advanced", {})
  149. advanced_rss = advanced.get("rss", {})
  150. advanced_crawler = advanced.get("crawler", {})
  151. # RSS 代理配置:优先使用 RSS 专属代理,否则复用 crawler 的 default_proxy
  152. rss_proxy_url = advanced_rss.get("proxy_url", "") or advanced_crawler.get("default_proxy", "")
  153. # 新鲜度过滤配置
  154. freshness_filter = rss.get("freshness_filter", {})
  155. # 验证并设置 max_age_days 默认值
  156. raw_max_age = freshness_filter.get("max_age_days", 3)
  157. try:
  158. max_age_days = int(raw_max_age)
  159. if max_age_days < 0:
  160. print(f"[警告] RSS freshness_filter.max_age_days 为负数 ({max_age_days}),使用默认值 3")
  161. max_age_days = 3
  162. except (ValueError, TypeError):
  163. print(f"[警告] RSS freshness_filter.max_age_days 格式错误 ({raw_max_age}),使用默认值 3")
  164. max_age_days = 3
  165. # RSS 配置直接从 config.yaml 读取,不再支持环境变量
  166. return {
  167. "ENABLED": rss.get("enabled", False),
  168. "REQUEST_INTERVAL": advanced_rss.get("request_interval", 2000),
  169. "TIMEOUT": advanced_rss.get("timeout", 15),
  170. "USE_PROXY": advanced_rss.get("use_proxy", False),
  171. "PROXY_URL": rss_proxy_url,
  172. "FEEDS": rss.get("feeds", []),
  173. "FRESHNESS_FILTER": {
  174. "ENABLED": freshness_filter.get("enabled", True), # 默认启用
  175. "MAX_AGE_DAYS": max_age_days,
  176. },
  177. }
  178. def _load_display_config(config_data: Dict) -> Dict:
  179. """加载推送内容显示配置"""
  180. display = config_data.get("display", {})
  181. regions = display.get("regions", {})
  182. standalone = display.get("standalone", {})
  183. # 默认区域顺序
  184. default_region_order = ["hotlist", "rss", "new_items", "standalone", "ai_analysis"]
  185. region_order = display.get("region_order", default_region_order)
  186. # 验证 region_order 中的值是否合法
  187. valid_regions = {"hotlist", "rss", "new_items", "standalone", "ai_analysis"}
  188. region_order = [r for r in region_order if r in valid_regions]
  189. # 如果过滤后为空,使用默认顺序
  190. if not region_order:
  191. region_order = default_region_order
  192. return {
  193. # 区域显示顺序
  194. "REGION_ORDER": region_order,
  195. # 区域开关
  196. "REGIONS": {
  197. "HOTLIST": regions.get("hotlist", True),
  198. "NEW_ITEMS": regions.get("new_items", True),
  199. "RSS": regions.get("rss", True),
  200. "STANDALONE": regions.get("standalone", False),
  201. "AI_ANALYSIS": regions.get("ai_analysis", True),
  202. },
  203. # 独立展示区配置
  204. "STANDALONE": {
  205. "PLATFORMS": standalone.get("platforms", []),
  206. "RSS_FEEDS": standalone.get("rss_feeds", []),
  207. "MAX_ITEMS": standalone.get("max_items", 20),
  208. },
  209. }
  210. def _load_ai_config(config_data: Dict) -> Dict:
  211. """加载 AI 模型配置(LiteLLM 格式)"""
  212. ai_config = config_data.get("ai", {})
  213. timeout_env = _get_env_int_or_none("AI_TIMEOUT")
  214. return {
  215. # LiteLLM 核心配置
  216. "MODEL": _get_env_str("AI_MODEL") or ai_config.get("model", ""),
  217. "API_KEY": _get_env_str("AI_API_KEY") or ai_config.get("api_key", ""),
  218. "API_BASE": _get_env_str("AI_API_BASE") or ai_config.get("api_base", ""),
  219. # 生成参数
  220. "TIMEOUT": timeout_env if timeout_env is not None else ai_config.get("timeout", 120),
  221. "TEMPERATURE": ai_config.get("temperature", 1.0),
  222. "MAX_TOKENS": ai_config.get("max_tokens", 5000),
  223. # LiteLLM 高级选项
  224. "NUM_RETRIES": ai_config.get("num_retries", 2),
  225. "FALLBACK_MODELS": ai_config.get("fallback_models", []),
  226. "EXTRA_PARAMS": ai_config.get("extra_params", {}),
  227. }
  228. def _load_ai_analysis_config(config_data: Dict) -> Dict:
  229. """加载 AI 分析配置(功能配置,模型配置见 _load_ai_config)"""
  230. ai_config = config_data.get("ai_analysis", {})
  231. enabled_env = _get_env_bool("AI_ANALYSIS_ENABLED")
  232. return {
  233. "ENABLED": enabled_env if enabled_env is not None else ai_config.get("enabled", False),
  234. "LANGUAGE": ai_config.get("language", "Chinese"),
  235. "PROMPT_FILE": ai_config.get("prompt_file", "ai_analysis_prompt.txt"),
  236. "MODE": ai_config.get("mode", "follow_report"),
  237. "MAX_NEWS_FOR_ANALYSIS": ai_config.get("max_news_for_analysis", 50),
  238. "INCLUDE_RSS": ai_config.get("include_rss", True),
  239. "INCLUDE_RANK_TIMELINE": ai_config.get("include_rank_timeline", False),
  240. "INCLUDE_STANDALONE": ai_config.get("include_standalone", False),
  241. }
  242. def _load_ai_translation_config(config_data: Dict) -> Dict:
  243. """加载 AI 翻译配置(功能配置,模型配置见 _load_ai_config)"""
  244. trans_config = config_data.get("ai_translation", {})
  245. enabled_env = _get_env_bool("AI_TRANSLATION_ENABLED")
  246. scope = trans_config.get("scope", {})
  247. return {
  248. "ENABLED": enabled_env if enabled_env is not None else trans_config.get("enabled", False),
  249. "LANGUAGE": _get_env_str("AI_TRANSLATION_LANGUAGE") or trans_config.get("language", "English"),
  250. "PROMPT_FILE": trans_config.get("prompt_file", "ai_translation_prompt.txt"),
  251. "SCOPE": {
  252. "HOTLIST": scope.get("hotlist", True),
  253. "RSS": scope.get("rss", True),
  254. "STANDALONE": scope.get("standalone", True),
  255. },
  256. }
  257. def _load_ai_filter_config(config_data: Dict) -> Dict:
  258. """加载 AI 智能筛选配置(由 filter.method 控制是否启用)"""
  259. ai_filter = config_data.get("ai_filter", {})
  260. return {
  261. "BATCH_SIZE": ai_filter.get("batch_size", 200),
  262. "BATCH_INTERVAL": ai_filter.get("batch_interval", 5),
  263. "INTERESTS_FILE": ai_filter.get("interests_file"), # None = 使用默认 config/ai_interests.txt
  264. "PROMPT_FILE": ai_filter.get("prompt_file", "prompt.txt"),
  265. "EXTRACT_PROMPT_FILE": ai_filter.get("extract_prompt_file", "extract_prompt.txt"),
  266. "UPDATE_TAGS_PROMPT_FILE": ai_filter.get("update_tags_prompt_file", "update_tags_prompt.txt"),
  267. "RECLASSIFY_THRESHOLD": ai_filter.get("reclassify_threshold", 0.6),
  268. "MIN_SCORE": float(ai_filter.get("min_score", 0)),
  269. }
  270. def _load_filter_config(config_data: Dict) -> Dict:
  271. """加载筛选策略配置"""
  272. filter_cfg = config_data.get("filter", {})
  273. # 环境变量兼容:AI_FILTER_ENABLED=true → method=ai
  274. env_ai_filter = _get_env_bool("AI_FILTER_ENABLED")
  275. method = filter_cfg.get("method", "keyword")
  276. if env_ai_filter is True:
  277. method = "ai"
  278. # 兼容旧配置:如果 ai_filter.enabled=true 且未显式设置 filter.method
  279. if method == "keyword" and not filter_cfg.get("method"):
  280. ai_filter = config_data.get("ai_filter", {})
  281. if ai_filter.get("enabled", False):
  282. method = "ai"
  283. return {
  284. "METHOD": method, # "keyword" | "ai"
  285. "PRIORITY_SORT_ENABLED": filter_cfg.get("priority_sort_enabled", False), # AI 模式标签优先级排序开关
  286. }
  287. def _load_storage_config(config_data: Dict) -> Dict:
  288. """加载存储配置"""
  289. storage = config_data.get("storage", {})
  290. formats = storage.get("formats", {})
  291. local = storage.get("local", {})
  292. remote = storage.get("remote", {})
  293. pull = storage.get("pull", {})
  294. txt_enabled_env = _get_env_bool("STORAGE_TXT_ENABLED")
  295. html_enabled_env = _get_env_bool("STORAGE_HTML_ENABLED")
  296. pull_enabled_env = _get_env_bool("PULL_ENABLED")
  297. return {
  298. "BACKEND": _get_env_str("STORAGE_BACKEND") or storage.get("backend", "auto"),
  299. "FORMATS": {
  300. "SQLITE": formats.get("sqlite", True),
  301. "TXT": txt_enabled_env if txt_enabled_env is not None else formats.get("txt", True),
  302. "HTML": html_enabled_env if html_enabled_env is not None else formats.get("html", True),
  303. },
  304. "LOCAL": {
  305. "DATA_DIR": local.get("data_dir", "output"),
  306. "RETENTION_DAYS": _get_env_int("LOCAL_RETENTION_DAYS") or local.get("retention_days", 0),
  307. },
  308. "REMOTE": {
  309. "ENDPOINT_URL": _get_env_str("S3_ENDPOINT_URL") or remote.get("endpoint_url", ""),
  310. "BUCKET_NAME": _get_env_str("S3_BUCKET_NAME") or remote.get("bucket_name", ""),
  311. "ACCESS_KEY_ID": _get_env_str("S3_ACCESS_KEY_ID") or remote.get("access_key_id", ""),
  312. "SECRET_ACCESS_KEY": _get_env_str("S3_SECRET_ACCESS_KEY") or remote.get("secret_access_key", ""),
  313. "REGION": _get_env_str("S3_REGION") or remote.get("region", ""),
  314. "RETENTION_DAYS": _get_env_int("REMOTE_RETENTION_DAYS") or remote.get("retention_days", 0),
  315. },
  316. "PULL": {
  317. "ENABLED": pull_enabled_env if pull_enabled_env is not None else pull.get("enabled", False),
  318. "DAYS": _get_env_int("PULL_DAYS") or pull.get("days", 7),
  319. },
  320. }
  321. def _load_webhook_config(config_data: Dict) -> Dict:
  322. """加载 Webhook 配置"""
  323. notification = config_data.get("notification", {})
  324. channels = notification.get("channels", {})
  325. # 各渠道配置
  326. feishu = channels.get("feishu", {})
  327. dingtalk = channels.get("dingtalk", {})
  328. wework = channels.get("wework", {})
  329. telegram = channels.get("telegram", {})
  330. email = channels.get("email", {})
  331. ntfy = channels.get("ntfy", {})
  332. bark = channels.get("bark", {})
  333. slack = channels.get("slack", {})
  334. generic = channels.get("generic_webhook", {})
  335. return {
  336. # 飞书
  337. "FEISHU_WEBHOOK_URL": _get_env_str("FEISHU_WEBHOOK_URL") or feishu.get("webhook_url", ""),
  338. # 钉钉
  339. "DINGTALK_WEBHOOK_URL": _get_env_str("DINGTALK_WEBHOOK_URL") or dingtalk.get("webhook_url", ""),
  340. # 企业微信
  341. "WEWORK_WEBHOOK_URL": _get_env_str("WEWORK_WEBHOOK_URL") or wework.get("webhook_url", ""),
  342. "WEWORK_MSG_TYPE": _get_env_str("WEWORK_MSG_TYPE") or wework.get("msg_type", "markdown"),
  343. # Telegram
  344. "TELEGRAM_BOT_TOKEN": _get_env_str("TELEGRAM_BOT_TOKEN") or telegram.get("bot_token", ""),
  345. "TELEGRAM_CHAT_ID": _get_env_str("TELEGRAM_CHAT_ID") or telegram.get("chat_id", ""),
  346. # 邮件
  347. "EMAIL_FROM": _get_env_str("EMAIL_FROM") or email.get("from", ""),
  348. "EMAIL_PASSWORD": _get_env_str("EMAIL_PASSWORD") or email.get("password", ""),
  349. "EMAIL_TO": _get_env_str("EMAIL_TO") or email.get("to", ""),
  350. "EMAIL_SMTP_SERVER": _get_env_str("EMAIL_SMTP_SERVER") or email.get("smtp_server", ""),
  351. "EMAIL_SMTP_PORT": _get_env_str("EMAIL_SMTP_PORT") or email.get("smtp_port", ""),
  352. # ntfy
  353. "NTFY_SERVER_URL": _get_env_str("NTFY_SERVER_URL") or ntfy.get("server_url") or "https://ntfy.sh",
  354. "NTFY_TOPIC": _get_env_str("NTFY_TOPIC") or ntfy.get("topic", ""),
  355. "NTFY_TOKEN": _get_env_str("NTFY_TOKEN") or ntfy.get("token", ""),
  356. # Bark
  357. "BARK_URL": _get_env_str("BARK_URL") or bark.get("url", ""),
  358. # Slack
  359. "SLACK_WEBHOOK_URL": _get_env_str("SLACK_WEBHOOK_URL") or slack.get("webhook_url", ""),
  360. # 通用 Webhook
  361. "GENERIC_WEBHOOK_URL": _get_env_str("GENERIC_WEBHOOK_URL") or generic.get("webhook_url", ""),
  362. "GENERIC_WEBHOOK_TEMPLATE": _get_env_str("GENERIC_WEBHOOK_TEMPLATE") or generic.get("payload_template", ""),
  363. }
  364. def _print_notification_sources(config: Dict) -> None:
  365. """打印通知渠道配置来源信息"""
  366. notification_sources = []
  367. max_accounts = config["MAX_ACCOUNTS_PER_CHANNEL"]
  368. if config["FEISHU_WEBHOOK_URL"]:
  369. accounts = parse_multi_account_config(config["FEISHU_WEBHOOK_URL"])
  370. count = min(len(accounts), max_accounts)
  371. source = "环境变量" if os.environ.get("FEISHU_WEBHOOK_URL") else "配置文件"
  372. notification_sources.append(f"飞书({source}, {count}个账号)")
  373. if config["DINGTALK_WEBHOOK_URL"]:
  374. accounts = parse_multi_account_config(config["DINGTALK_WEBHOOK_URL"])
  375. count = min(len(accounts), max_accounts)
  376. source = "环境变量" if os.environ.get("DINGTALK_WEBHOOK_URL") else "配置文件"
  377. notification_sources.append(f"钉钉({source}, {count}个账号)")
  378. if config["WEWORK_WEBHOOK_URL"]:
  379. accounts = parse_multi_account_config(config["WEWORK_WEBHOOK_URL"])
  380. count = min(len(accounts), max_accounts)
  381. source = "环境变量" if os.environ.get("WEWORK_WEBHOOK_URL") else "配置文件"
  382. notification_sources.append(f"企业微信({source}, {count}个账号)")
  383. if config["TELEGRAM_BOT_TOKEN"] and config["TELEGRAM_CHAT_ID"]:
  384. tokens = parse_multi_account_config(config["TELEGRAM_BOT_TOKEN"])
  385. chat_ids = parse_multi_account_config(config["TELEGRAM_CHAT_ID"])
  386. valid, count = validate_paired_configs(
  387. {"bot_token": tokens, "chat_id": chat_ids},
  388. "Telegram",
  389. required_keys=["bot_token", "chat_id"]
  390. )
  391. if valid and count > 0:
  392. count = min(count, max_accounts)
  393. token_source = "环境变量" if os.environ.get("TELEGRAM_BOT_TOKEN") else "配置文件"
  394. notification_sources.append(f"Telegram({token_source}, {count}个账号)")
  395. if config["EMAIL_FROM"] and config["EMAIL_PASSWORD"] and config["EMAIL_TO"]:
  396. from_source = "环境变量" if os.environ.get("EMAIL_FROM") else "配置文件"
  397. notification_sources.append(f"邮件({from_source})")
  398. if config["NTFY_SERVER_URL"] and config["NTFY_TOPIC"]:
  399. topics = parse_multi_account_config(config["NTFY_TOPIC"])
  400. tokens = parse_multi_account_config(config["NTFY_TOKEN"])
  401. if tokens:
  402. valid, count = validate_paired_configs(
  403. {"topic": topics, "token": tokens},
  404. "ntfy"
  405. )
  406. if valid and count > 0:
  407. count = min(count, max_accounts)
  408. server_source = "环境变量" if os.environ.get("NTFY_SERVER_URL") else "配置文件"
  409. notification_sources.append(f"ntfy({server_source}, {count}个账号)")
  410. else:
  411. count = min(len(topics), max_accounts)
  412. server_source = "环境变量" if os.environ.get("NTFY_SERVER_URL") else "配置文件"
  413. notification_sources.append(f"ntfy({server_source}, {count}个账号)")
  414. if config["BARK_URL"]:
  415. accounts = parse_multi_account_config(config["BARK_URL"])
  416. count = min(len(accounts), max_accounts)
  417. bark_source = "环境变量" if os.environ.get("BARK_URL") else "配置文件"
  418. notification_sources.append(f"Bark({bark_source}, {count}个账号)")
  419. if config["SLACK_WEBHOOK_URL"]:
  420. accounts = parse_multi_account_config(config["SLACK_WEBHOOK_URL"])
  421. count = min(len(accounts), max_accounts)
  422. slack_source = "环境变量" if os.environ.get("SLACK_WEBHOOK_URL") else "配置文件"
  423. notification_sources.append(f"Slack({slack_source}, {count}个账号)")
  424. if config.get("GENERIC_WEBHOOK_URL"):
  425. accounts = parse_multi_account_config(config["GENERIC_WEBHOOK_URL"])
  426. count = min(len(accounts), max_accounts)
  427. source = "环境变量" if os.environ.get("GENERIC_WEBHOOK_URL") else "配置文件"
  428. notification_sources.append(f"通用Webhook({source}, {count}个账号)")
  429. if notification_sources:
  430. print(f"通知渠道配置来源: {', '.join(notification_sources)}")
  431. print(f"每个渠道最大账号数: {max_accounts}")
  432. else:
  433. print("未配置任何通知渠道")
  434. def load_config(config_path: Optional[str] = None) -> Dict[str, Any]:
  435. """
  436. 加载配置文件
  437. Args:
  438. config_path: 配置文件路径,默认从环境变量 CONFIG_PATH 获取或使用 config/config.yaml
  439. Returns:
  440. 包含所有配置的字典
  441. Raises:
  442. FileNotFoundError: 配置文件不存在
  443. """
  444. if config_path is None:
  445. config_path = os.environ.get("CONFIG_PATH", "config/config.yaml")
  446. if not Path(config_path).exists():
  447. raise FileNotFoundError(f"配置文件 {config_path} 不存在")
  448. with open(config_path, "r", encoding="utf-8") as f:
  449. config_data = yaml.safe_load(f)
  450. print(f"配置文件加载成功: {config_path}")
  451. # 合并所有配置
  452. config = {}
  453. # 应用配置
  454. config.update(_load_app_config(config_data))
  455. # 爬虫配置
  456. config.update(_load_crawler_config(config_data))
  457. # 报告配置
  458. config.update(_load_report_config(config_data))
  459. # 通知配置
  460. config.update(_load_notification_config(config_data))
  461. # 统一调度配置
  462. config["SCHEDULE"] = _load_schedule_config(config_data)
  463. config["_TIMELINE_DATA"] = _load_timeline_data(
  464. str(Path(config_path).parent) if config_path else "config"
  465. )
  466. # 权重配置
  467. config["WEIGHT_CONFIG"] = _load_weight_config(config_data)
  468. # 平台配置
  469. platforms_config = config_data.get("platforms", {})
  470. config["PLATFORMS"] = [p for p in platforms_config.get("sources", []) if p.get("enabled", True)]
  471. # RSS 配置
  472. config["RSS"] = _load_rss_config(config_data)
  473. # AI 模型共享配置
  474. config["AI"] = _load_ai_config(config_data)
  475. # AI 分析配置
  476. config["AI_ANALYSIS"] = _load_ai_analysis_config(config_data)
  477. # AI 翻译配置
  478. config["AI_TRANSLATION"] = _load_ai_translation_config(config_data)
  479. # AI 智能筛选配置
  480. config["AI_FILTER"] = _load_ai_filter_config(config_data)
  481. # 筛选策略配置
  482. config["FILTER"] = _load_filter_config(config_data)
  483. # 推送内容显示配置
  484. config["DISPLAY"] = _load_display_config(config_data)
  485. # 存储配置
  486. config["STORAGE"] = _load_storage_config(config_data)
  487. # Webhook 配置
  488. config.update(_load_webhook_config(config_data))
  489. # 打印通知渠道配置来源
  490. _print_notification_sources(config)
  491. return config