__main__.py 95 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293
  1. # coding=utf-8
  2. """
  3. TrendRadar 主程序
  4. 热点新闻聚合与分析工具
  5. 支持: python -m trendradar
  6. """
  7. import argparse
  8. import copy
  9. import json
  10. import os
  11. import re
  12. import sys
  13. import webbrowser
  14. from datetime import datetime, timezone
  15. from pathlib import Path
  16. from typing import Dict, List, Tuple, Optional
  17. import requests
  18. from trendradar.context import AppContext
  19. from trendradar import __version__
  20. from trendradar.core import load_config, parse_multi_account_config, validate_paired_configs
  21. from trendradar.core.analyzer import convert_keyword_stats_to_platform_stats
  22. from trendradar.crawler import DataFetcher
  23. from trendradar.storage import convert_crawl_results_to_news_data
  24. from trendradar.utils.time import DEFAULT_TIMEZONE, is_within_days, calculate_days_old
  25. from trendradar.ai import AIAnalyzer, AIAnalysisResult
  26. from trendradar.core.scheduler import ResolvedSchedule
  27. def _parse_version(version_str: str) -> Tuple[int, int, int]:
  28. """解析版本号字符串为元组"""
  29. try:
  30. parts = version_str.strip().split(".")
  31. if len(parts) >= 3:
  32. return int(parts[0]), int(parts[1]), int(parts[2])
  33. return 0, 0, 0
  34. except (ValueError, AttributeError, TypeError):
  35. return 0, 0, 0
  36. def _compare_version(local: str, remote: str) -> str:
  37. """比较版本号,返回状态文字"""
  38. local_tuple = _parse_version(local)
  39. remote_tuple = _parse_version(remote)
  40. if local_tuple < remote_tuple:
  41. return "⚠️ 需要更新"
  42. elif local_tuple > remote_tuple:
  43. return "🔮 超前版本"
  44. else:
  45. return "✅ 已是最新"
  46. def _fetch_remote_version(version_url: str, proxy_url: Optional[str] = None) -> Optional[str]:
  47. """获取远程版本号"""
  48. try:
  49. proxies = None
  50. if proxy_url:
  51. proxies = {"http": proxy_url, "https": proxy_url}
  52. headers = {
  53. "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
  54. "Accept": "text/plain, */*",
  55. "Cache-Control": "no-cache",
  56. }
  57. response = requests.get(version_url, proxies=proxies, headers=headers, timeout=10)
  58. response.raise_for_status()
  59. return response.text.strip()
  60. except Exception as e:
  61. print(f"[版本检查] 获取远程版本失败: {e}")
  62. return None
  63. def _parse_config_versions(content: str) -> Dict[str, str]:
  64. """解析配置文件版本内容为字典"""
  65. versions = {}
  66. try:
  67. if not content:
  68. return versions
  69. for line in content.splitlines():
  70. line = line.strip()
  71. if not line or "=" not in line:
  72. continue
  73. name, version = line.split("=", 1)
  74. versions[name.strip()] = version.strip()
  75. except Exception as e:
  76. print(f"[版本检查] 解析配置版本失败: {e}")
  77. return versions
  78. def check_all_versions(
  79. version_url: str,
  80. configs_version_url: Optional[str] = None,
  81. proxy_url: Optional[str] = None
  82. ) -> Tuple[bool, Optional[str]]:
  83. """
  84. 统一版本检查:程序版本 + 配置文件版本
  85. Args:
  86. version_url: 远程程序版本检查 URL
  87. configs_version_url: 远程配置文件版本检查 URL (返回格式: filename=version)
  88. proxy_url: 代理 URL
  89. Returns:
  90. (need_update, remote_version): 程序是否需要更新及远程版本号
  91. """
  92. # 获取远程版本
  93. remote_version = _fetch_remote_version(version_url, proxy_url)
  94. # 获取远程配置版本(如果有提供 URL)
  95. remote_config_versions = {}
  96. if configs_version_url:
  97. content = _fetch_remote_version(configs_version_url, proxy_url)
  98. if content:
  99. remote_config_versions = _parse_config_versions(content)
  100. print("=" * 60)
  101. print("版本检查")
  102. print("=" * 60)
  103. if remote_version:
  104. print(f"远程程序版本: {remote_version}")
  105. else:
  106. print("远程程序版本: 获取失败")
  107. if configs_version_url:
  108. if remote_config_versions:
  109. print(f"远程配置清单: 获取成功 ({len(remote_config_versions)} 个文件)")
  110. else:
  111. print("远程配置清单: 获取失败或为空")
  112. print("-" * 60)
  113. program_status = _compare_version(__version__, remote_version) if remote_version else "(无法比较)"
  114. print(f" 主程序版本: {__version__} {program_status}")
  115. config_files = [
  116. Path("config/config.yaml"),
  117. Path("config/timeline.yaml"),
  118. Path("config/frequency_words.txt"),
  119. Path("config/ai_interests.txt"),
  120. Path("config/ai_analysis_prompt.txt"),
  121. Path("config/ai_translation_prompt.txt"),
  122. ]
  123. version_pattern = re.compile(r"Version:\s*(\d+\.\d+\.\d+)", re.IGNORECASE)
  124. for config_file in config_files:
  125. if not config_file.exists():
  126. print(f" {config_file.name}: 文件不存在")
  127. continue
  128. try:
  129. with open(config_file, "r", encoding="utf-8") as f:
  130. local_version = None
  131. for i, line in enumerate(f):
  132. if i >= 20:
  133. break
  134. match = version_pattern.search(line)
  135. if match:
  136. local_version = match.group(1)
  137. break
  138. # 获取该文件的远程版本
  139. target_remote_version = remote_config_versions.get(config_file.name)
  140. if local_version:
  141. if target_remote_version:
  142. status = _compare_version(local_version, target_remote_version)
  143. print(f" {config_file.name}: {local_version} {status}")
  144. else:
  145. print(f" {config_file.name}: {local_version} (未找到远程版本)")
  146. else:
  147. print(f" {config_file.name}: 未找到本地版本号")
  148. except Exception as e:
  149. print(f" {config_file.name}: 读取失败 - {e}")
  150. print("=" * 60)
  151. # 返回程序版本的更新状态
  152. if remote_version:
  153. need_update = _parse_version(__version__) < _parse_version(remote_version)
  154. return need_update, remote_version if need_update else None
  155. return False, None
  156. # === 主分析器 ===
  157. class NewsAnalyzer:
  158. """新闻分析器"""
  159. # 模式策略定义
  160. MODE_STRATEGIES = {
  161. "incremental": {
  162. "mode_name": "增量模式",
  163. "description": "增量模式(只关注新增新闻,无新增时不推送)",
  164. "report_type": "增量分析",
  165. "should_send_notification": True,
  166. },
  167. "current": {
  168. "mode_name": "当前榜单模式",
  169. "description": "当前榜单模式(当前榜单匹配新闻 + 新增新闻区域 + 按时推送)",
  170. "report_type": "当前榜单",
  171. "should_send_notification": True,
  172. },
  173. "daily": {
  174. "mode_name": "全天汇总模式",
  175. "description": "全天汇总模式(所有匹配新闻 + 新增新闻区域 + 按时推送)",
  176. "report_type": "全天汇总",
  177. "should_send_notification": True,
  178. },
  179. }
  180. def __init__(self, config: Optional[Dict] = None):
  181. # 使用传入的配置或加载新配置
  182. if config is None:
  183. print("正在加载配置...")
  184. config = load_config()
  185. print(f"TrendRadar v{__version__} 配置加载完成")
  186. print(f"监控平台数量: {len(config['PLATFORMS'])}")
  187. print(f"时区: {config.get('TIMEZONE', DEFAULT_TIMEZONE)}")
  188. # 创建应用上下文
  189. self.ctx = AppContext(config)
  190. self.request_interval = self.ctx.config["REQUEST_INTERVAL"]
  191. self.report_mode = self.ctx.config["REPORT_MODE"]
  192. self.frequency_file = None
  193. self.filter_method = None # None=使用全局配置 ctx.filter_method
  194. self.interests_file = None # None=使用全局配置 ai_filter.interests_file
  195. self.rank_threshold = self.ctx.rank_threshold
  196. self.is_github_actions = os.environ.get("GITHUB_ACTIONS") == "true"
  197. self.is_docker_container = self._detect_docker_environment()
  198. self.update_info = None
  199. self.proxy_url = None
  200. self._setup_proxy()
  201. self.data_fetcher = DataFetcher(self.proxy_url)
  202. # 初始化存储管理器(使用 AppContext)
  203. self._init_storage_manager()
  204. # 注意:update_info 由 main() 函数设置,避免重复请求远程版本
  205. def _init_storage_manager(self) -> None:
  206. """初始化存储管理器(使用 AppContext)"""
  207. # 获取数据保留天数(支持环境变量覆盖)
  208. env_retention = os.environ.get("STORAGE_RETENTION_DAYS", "").strip()
  209. if env_retention:
  210. # 环境变量覆盖配置
  211. self.ctx.config["STORAGE"]["RETENTION_DAYS"] = int(env_retention)
  212. self.storage_manager = self.ctx.get_storage_manager()
  213. print(f"存储后端: {self.storage_manager.backend_name}")
  214. retention_days = self.ctx.config.get("STORAGE", {}).get("RETENTION_DAYS", 0)
  215. if retention_days > 0:
  216. print(f"数据保留天数: {retention_days} 天")
  217. def _detect_docker_environment(self) -> bool:
  218. """检测是否运行在 Docker 容器中"""
  219. try:
  220. if os.environ.get("DOCKER_CONTAINER") == "true":
  221. return True
  222. if os.path.exists("/.dockerenv"):
  223. return True
  224. return False
  225. except Exception:
  226. return False
  227. def _should_open_browser(self) -> bool:
  228. """判断是否应该打开浏览器"""
  229. return not self.is_github_actions and not self.is_docker_container
  230. def _setup_proxy(self) -> None:
  231. """设置代理配置"""
  232. if not self.is_github_actions and self.ctx.config["USE_PROXY"]:
  233. self.proxy_url = self.ctx.config["DEFAULT_PROXY"]
  234. print("本地环境,使用代理")
  235. elif not self.is_github_actions and not self.ctx.config["USE_PROXY"]:
  236. print("本地环境,未启用代理")
  237. else:
  238. print("GitHub Actions环境,不使用代理")
  239. def _set_update_info_from_config(self) -> None:
  240. """从已缓存的远程版本设置更新信息(不再重复请求)"""
  241. try:
  242. version_url = self.ctx.config.get("VERSION_CHECK_URL", "")
  243. if not version_url:
  244. return
  245. remote_version = _fetch_remote_version(version_url, self.proxy_url)
  246. if remote_version:
  247. need_update = _parse_version(__version__) < _parse_version(remote_version)
  248. if need_update:
  249. self.update_info = {
  250. "current_version": __version__,
  251. "remote_version": remote_version,
  252. }
  253. except Exception as e:
  254. print(f"版本检查出错: {e}")
  255. def _get_mode_strategy(self) -> Dict:
  256. """获取当前模式的策略配置"""
  257. return self.MODE_STRATEGIES.get(self.report_mode, self.MODE_STRATEGIES["daily"])
  258. def _has_notification_configured(self) -> bool:
  259. """检查是否配置了任何通知渠道"""
  260. cfg = self.ctx.config
  261. return any(
  262. [
  263. cfg["FEISHU_WEBHOOK_URL"],
  264. cfg["DINGTALK_WEBHOOK_URL"],
  265. cfg["WEWORK_WEBHOOK_URL"],
  266. (cfg["TELEGRAM_BOT_TOKEN"] and cfg["TELEGRAM_CHAT_ID"]),
  267. (
  268. cfg["EMAIL_FROM"]
  269. and cfg["EMAIL_PASSWORD"]
  270. and cfg["EMAIL_TO"]
  271. ),
  272. (cfg["NTFY_SERVER_URL"] and cfg["NTFY_TOPIC"]),
  273. cfg["BARK_URL"],
  274. cfg["SLACK_WEBHOOK_URL"],
  275. cfg["GENERIC_WEBHOOK_URL"],
  276. ]
  277. )
  278. def _has_valid_content(
  279. self, stats: List[Dict], new_titles: Optional[Dict] = None
  280. ) -> bool:
  281. """检查是否有有效的新闻内容"""
  282. if self.report_mode == "incremental":
  283. # 增量模式:只要有匹配的新闻就推送
  284. # count_word_frequency 已经确保只处理新增的新闻(包括当天第一次爬取的情况)
  285. has_matched_news = any(stat["count"] > 0 for stat in stats)
  286. return has_matched_news
  287. elif self.report_mode == "current":
  288. # current模式:只要stats有内容就说明有匹配的新闻
  289. return any(stat["count"] > 0 for stat in stats)
  290. else:
  291. # 当日汇总模式下,检查是否有匹配的频率词新闻或新增新闻
  292. has_matched_news = any(stat["count"] > 0 for stat in stats)
  293. has_new_news = bool(
  294. new_titles and any(len(titles) > 0 for titles in new_titles.values())
  295. )
  296. return has_matched_news or has_new_news
  297. def _prepare_ai_analysis_data(
  298. self,
  299. ai_mode: str,
  300. current_results: Optional[Dict] = None,
  301. current_id_to_name: Optional[Dict] = None,
  302. ) -> Tuple[List[Dict], Optional[Dict]]:
  303. """
  304. 为 AI 分析准备指定模式的数据
  305. Args:
  306. ai_mode: AI 分析模式 (daily/current/incremental)
  307. current_results: 当前抓取的结果(用于 incremental 模式)
  308. current_id_to_name: 当前的平台映射(用于 incremental 模式)
  309. Returns:
  310. Tuple[stats, id_to_name]: 统计数据和平台映射
  311. """
  312. try:
  313. word_groups, filter_words, global_filters = self.ctx.load_frequency_words(self.frequency_file)
  314. if ai_mode == "incremental":
  315. # incremental 模式:使用当前抓取的数据
  316. if not current_results or not current_id_to_name:
  317. print("[AI] incremental 模式需要当前抓取数据,但未提供")
  318. return [], None
  319. # 准备当前时间信息
  320. time_info = self.ctx.format_time()
  321. title_info = self._prepare_current_title_info(current_results, time_info)
  322. # 检测新增标题
  323. new_titles = self.ctx.detect_new_titles(list(current_results.keys()))
  324. # 统计计算
  325. stats, _ = self.ctx.count_frequency(
  326. current_results,
  327. word_groups,
  328. filter_words,
  329. current_id_to_name,
  330. title_info,
  331. new_titles,
  332. mode="incremental",
  333. global_filters=global_filters,
  334. quiet=True,
  335. )
  336. # 如果是 platform 模式,转换数据结构
  337. if self.ctx.display_mode == "platform" and stats:
  338. stats = convert_keyword_stats_to_platform_stats(
  339. stats,
  340. self.ctx.weight_config,
  341. self.ctx.rank_threshold,
  342. )
  343. return stats, current_id_to_name
  344. elif ai_mode in ["daily", "current"]:
  345. # 加载历史数据
  346. analysis_data = self._load_analysis_data(quiet=True)
  347. if not analysis_data:
  348. print(f"[AI] 无法加载历史数据用于 {ai_mode} 模式分析")
  349. return [], None
  350. (
  351. all_results,
  352. id_to_name,
  353. title_info,
  354. new_titles,
  355. _,
  356. _,
  357. _,
  358. ) = analysis_data
  359. # 统计计算
  360. stats, _ = self.ctx.count_frequency(
  361. all_results,
  362. word_groups,
  363. filter_words,
  364. id_to_name,
  365. title_info,
  366. new_titles,
  367. mode=ai_mode,
  368. global_filters=global_filters,
  369. quiet=True,
  370. )
  371. # 如果是 platform 模式,转换数据结构
  372. if self.ctx.display_mode == "platform" and stats:
  373. stats = convert_keyword_stats_to_platform_stats(
  374. stats,
  375. self.ctx.weight_config,
  376. self.ctx.rank_threshold,
  377. )
  378. return stats, id_to_name
  379. else:
  380. print(f"[AI] 未知的 AI 模式: {ai_mode}")
  381. return [], None
  382. except Exception as e:
  383. print(f"[AI] 准备 {ai_mode} 模式数据时出错: {e}")
  384. if self.ctx.config.get("DEBUG", False):
  385. import traceback
  386. traceback.print_exc()
  387. return [], None
  388. def _run_ai_analysis(
  389. self,
  390. stats: List[Dict],
  391. rss_items: Optional[List[Dict]],
  392. mode: str,
  393. report_type: str,
  394. id_to_name: Optional[Dict],
  395. current_results: Optional[Dict] = None,
  396. schedule: ResolvedSchedule = None,
  397. standalone_data: Optional[Dict] = None,
  398. ) -> Optional[AIAnalysisResult]:
  399. """执行 AI 分析"""
  400. analysis_config = self.ctx.config.get("AI_ANALYSIS", {})
  401. if not analysis_config.get("ENABLED", False):
  402. return None
  403. # 调度系统决策
  404. if not schedule.analyze:
  405. print("[AI] 调度器: 当前时间段不执行 AI 分析")
  406. return None
  407. if schedule.once_analyze and schedule.period_key:
  408. scheduler = self.ctx.create_scheduler()
  409. date_str = self.ctx.format_date()
  410. if scheduler.already_executed(schedule.period_key, "analyze", date_str):
  411. print(f"[AI] 调度器: 时间段 {schedule.period_name or schedule.period_key} 今天已分析过,跳过")
  412. return None
  413. else:
  414. print(f"[AI] 调度器: 时间段 {schedule.period_name or schedule.period_key} 今天首次分析")
  415. print("[AI] 正在进行 AI 分析...")
  416. try:
  417. ai_config = self.ctx.config.get("AI", {})
  418. debug_mode = self.ctx.config.get("DEBUG", False)
  419. analyzer = AIAnalyzer(ai_config, analysis_config, self.ctx.get_time, debug=debug_mode)
  420. # 确定 AI 分析使用的模式
  421. ai_mode_config = analysis_config.get("MODE", "follow_report")
  422. if ai_mode_config == "follow_report":
  423. # 跟随推送报告模式
  424. ai_mode = mode
  425. ai_stats = stats
  426. ai_id_to_name = id_to_name
  427. elif ai_mode_config in ["daily", "current", "incremental"]:
  428. # 使用独立配置的模式,需要重新准备数据
  429. ai_mode = ai_mode_config
  430. if ai_mode != mode:
  431. print(f"[AI] 使用独立分析模式: {ai_mode} (推送模式: {mode})")
  432. print(f"[AI] 正在准备 {ai_mode} 模式的数据...")
  433. # 根据 AI 模式重新准备数据
  434. ai_stats, ai_id_to_name = self._prepare_ai_analysis_data(
  435. ai_mode, current_results, id_to_name
  436. )
  437. if not ai_stats:
  438. print(f"[AI] 警告: 无法准备 {ai_mode} 模式的数据,回退到推送模式数据")
  439. ai_stats = stats
  440. ai_id_to_name = id_to_name
  441. ai_mode = mode
  442. else:
  443. ai_stats = stats
  444. ai_id_to_name = id_to_name
  445. else:
  446. # 配置错误,回退到跟随模式
  447. print(f"[AI] 警告: 无效的 ai_analysis.mode 配置 '{ai_mode_config}',使用推送模式 '{mode}'")
  448. ai_mode = mode
  449. ai_stats = stats
  450. ai_id_to_name = id_to_name
  451. # 提取平台列表
  452. platforms = list(ai_id_to_name.values()) if ai_id_to_name else []
  453. # 提取关键词列表
  454. keywords = [s.get("word", "") for s in ai_stats if s.get("word")] if ai_stats else []
  455. # 确定报告类型
  456. if ai_mode != mode:
  457. # 根据 AI 模式确定报告类型
  458. ai_report_type = {
  459. "daily": "当日汇总",
  460. "current": "当前榜单",
  461. "incremental": "增量更新"
  462. }.get(ai_mode, report_type)
  463. else:
  464. ai_report_type = report_type
  465. result = analyzer.analyze(
  466. stats=ai_stats,
  467. rss_stats=rss_items,
  468. report_mode=ai_mode,
  469. report_type=ai_report_type,
  470. platforms=platforms,
  471. keywords=keywords,
  472. standalone_data=standalone_data,
  473. )
  474. # 设置 AI 分析使用的模式
  475. if result.success:
  476. result.ai_mode = ai_mode
  477. if result.error:
  478. # 成功但有警告(如 JSON 解析问题但使用了原始文本)
  479. print(f"[AI] 分析完成(有警告: {result.error})")
  480. else:
  481. print("[AI] 分析完成")
  482. # 记录 AI 分析
  483. if schedule.once_analyze and schedule.period_key:
  484. scheduler = self.ctx.create_scheduler()
  485. date_str = self.ctx.format_date()
  486. scheduler.record_execution(schedule.period_key, "analyze", date_str)
  487. elif result.skipped:
  488. print(f"[AI] {result.error}")
  489. else:
  490. print(f"[AI] 分析失败: {result.error}")
  491. return result
  492. except Exception as e:
  493. import traceback
  494. error_type = type(e).__name__
  495. error_msg = str(e)
  496. # 截断过长的错误消息
  497. if len(error_msg) > 200:
  498. error_msg = error_msg[:200] + "..."
  499. print(f"[AI] 分析出错 ({error_type}): {error_msg}")
  500. # 详细错误日志到 stderr
  501. import sys
  502. print(f"[AI] 详细错误堆栈:", file=sys.stderr)
  503. traceback.print_exc(file=sys.stderr)
  504. return AIAnalysisResult(success=False, error=f"{error_type}: {error_msg}")
  505. def _load_analysis_data(
  506. self,
  507. quiet: bool = False,
  508. ) -> Optional[Tuple[Dict, Dict, Dict, Dict, List, List]]:
  509. """统一的数据加载和预处理,使用当前监控平台列表过滤历史数据"""
  510. try:
  511. # 获取当前配置的监控平台ID列表
  512. current_platform_ids = self.ctx.platform_ids
  513. if not quiet:
  514. print(f"当前监控平台: {current_platform_ids}")
  515. all_results, id_to_name, title_info = self.ctx.read_today_titles(
  516. current_platform_ids, quiet=quiet
  517. )
  518. if not all_results:
  519. print("没有找到当天的数据")
  520. return None
  521. total_titles = sum(len(titles) for titles in all_results.values())
  522. if not quiet:
  523. print(f"读取到 {total_titles} 个标题(已按当前监控平台过滤)")
  524. new_titles = self.ctx.detect_new_titles(current_platform_ids, quiet=quiet)
  525. word_groups, filter_words, global_filters = self.ctx.load_frequency_words(self.frequency_file)
  526. return (
  527. all_results,
  528. id_to_name,
  529. title_info,
  530. new_titles,
  531. word_groups,
  532. filter_words,
  533. global_filters,
  534. )
  535. except Exception as e:
  536. print(f"数据加载失败: {e}")
  537. return None
  538. def _prepare_current_title_info(self, results: Dict, time_info: str) -> Dict:
  539. """从当前抓取结果构建标题信息"""
  540. title_info = {}
  541. for source_id, titles_data in results.items():
  542. title_info[source_id] = {}
  543. for title, title_data in titles_data.items():
  544. ranks = title_data.get("ranks", [])
  545. url = title_data.get("url", "")
  546. mobile_url = title_data.get("mobileUrl", "")
  547. title_info[source_id][title] = {
  548. "first_time": time_info,
  549. "last_time": time_info,
  550. "count": 1,
  551. "ranks": ranks,
  552. "url": url,
  553. "mobileUrl": mobile_url,
  554. }
  555. return title_info
  556. def _prepare_standalone_data(
  557. self,
  558. results: Dict,
  559. id_to_name: Dict,
  560. title_info: Optional[Dict] = None,
  561. rss_items: Optional[List[Dict]] = None,
  562. ) -> Optional[Dict]:
  563. """
  564. 从原始数据中提取独立展示区数据
  565. 纯数据准备方法,不检查 display.regions.standalone 开关。
  566. 各消费者自行决定是否使用:
  567. - AI 分析:由 ai.include_standalone 控制
  568. - 通知推送:由 display.regions.standalone 控制(在 dispatcher 层门控)
  569. - HTML 报告:始终包含(如果有数据)
  570. Args:
  571. results: 原始爬取结果 {platform_id: {title: title_data}}
  572. id_to_name: 平台 ID 到名称的映射
  573. title_info: 标题元信息(含排名历史、时间等)
  574. rss_items: RSS 条目列表
  575. Returns:
  576. 独立展示数据字典,如果未配置数据源返回 None
  577. """
  578. display_config = self.ctx.config.get("DISPLAY", {})
  579. standalone_config = display_config.get("STANDALONE", {})
  580. platform_ids = standalone_config.get("PLATFORMS", [])
  581. rss_feed_ids = standalone_config.get("RSS_FEEDS", [])
  582. max_items = standalone_config.get("MAX_ITEMS", 20)
  583. if not platform_ids and not rss_feed_ids:
  584. return None
  585. standalone_data = {
  586. "platforms": [],
  587. "rss_feeds": [],
  588. }
  589. # 找出最新批次时间(类似 current 模式的过滤逻辑)
  590. latest_time = None
  591. if title_info:
  592. for source_titles in title_info.values():
  593. for title_data in source_titles.values():
  594. last_time = title_data.get("last_time", "")
  595. if last_time:
  596. if latest_time is None or last_time > latest_time:
  597. latest_time = last_time
  598. # 提取热榜平台数据
  599. for platform_id in platform_ids:
  600. if platform_id not in results:
  601. continue
  602. platform_name = id_to_name.get(platform_id, platform_id)
  603. platform_titles = results[platform_id]
  604. items = []
  605. for title, title_data in platform_titles.items():
  606. # 获取元信息(如果有 title_info)
  607. meta = {}
  608. if title_info and platform_id in title_info and title in title_info[platform_id]:
  609. meta = title_info[platform_id][title]
  610. # 只保留当前在榜的话题(last_time 等于最新时间)
  611. if latest_time and meta:
  612. if meta.get("last_time") != latest_time:
  613. continue
  614. # 使用当前热榜的排名数据(title_data)进行排序
  615. # title_data 包含的是爬虫返回的当前排名,用于保证独立展示区的顺序与热榜一致
  616. current_ranks = title_data.get("ranks", [])
  617. current_rank = current_ranks[-1] if current_ranks else 0
  618. # 用于显示的排名范围:合并历史排名和当前排名
  619. historical_ranks = meta.get("ranks", []) if meta else []
  620. # 合并去重,保持顺序
  621. all_ranks = historical_ranks.copy()
  622. for rank in current_ranks:
  623. if rank not in all_ranks:
  624. all_ranks.append(rank)
  625. display_ranks = all_ranks if all_ranks else current_ranks
  626. item = {
  627. "title": title,
  628. "url": title_data.get("url", ""),
  629. "mobileUrl": title_data.get("mobileUrl", ""),
  630. "rank": current_rank, # 用于排序的当前排名
  631. "ranks": display_ranks, # 用于显示的排名范围(历史+当前)
  632. "first_time": meta.get("first_time", ""),
  633. "last_time": meta.get("last_time", ""),
  634. "count": meta.get("count", 1),
  635. "rank_timeline": meta.get("rank_timeline", []),
  636. }
  637. items.append(item)
  638. # 按当前排名排序
  639. items.sort(key=lambda x: x["rank"] if x["rank"] > 0 else 9999)
  640. # 限制条数
  641. if max_items > 0:
  642. items = items[:max_items]
  643. if items:
  644. standalone_data["platforms"].append({
  645. "id": platform_id,
  646. "name": platform_name,
  647. "items": items,
  648. })
  649. # 提取 RSS 数据
  650. if rss_items and rss_feed_ids:
  651. # 按 feed_id 分组
  652. feed_items_map = {}
  653. for item in rss_items:
  654. feed_id = item.get("feed_id", "")
  655. if feed_id in rss_feed_ids:
  656. if feed_id not in feed_items_map:
  657. feed_items_map[feed_id] = {
  658. "name": item.get("feed_name", feed_id),
  659. "items": [],
  660. }
  661. feed_items_map[feed_id]["items"].append({
  662. "title": item.get("title", ""),
  663. "url": item.get("url", ""),
  664. "published_at": item.get("published_at", ""),
  665. "author": item.get("author", ""),
  666. })
  667. # 限制条数并添加到结果
  668. for feed_id in rss_feed_ids:
  669. if feed_id in feed_items_map:
  670. feed_data = feed_items_map[feed_id]
  671. items = feed_data["items"]
  672. if max_items > 0:
  673. items = items[:max_items]
  674. if items:
  675. standalone_data["rss_feeds"].append({
  676. "id": feed_id,
  677. "name": feed_data["name"],
  678. "items": items,
  679. })
  680. # 如果没有任何数据,返回 None
  681. if not standalone_data["platforms"] and not standalone_data["rss_feeds"]:
  682. return None
  683. return standalone_data
  684. def _run_analysis_pipeline(
  685. self,
  686. data_source: Dict,
  687. mode: str,
  688. title_info: Dict,
  689. new_titles: Dict,
  690. word_groups: List[Dict],
  691. filter_words: List[str],
  692. id_to_name: Dict,
  693. failed_ids: Optional[List] = None,
  694. global_filters: Optional[List[str]] = None,
  695. quiet: bool = False,
  696. rss_items: Optional[List[Dict]] = None,
  697. rss_new_items: Optional[List[Dict]] = None,
  698. standalone_data: Optional[Dict] = None,
  699. schedule: ResolvedSchedule = None,
  700. rss_new_urls: Optional[set] = None,
  701. ) -> Tuple[List[Dict], Optional[str], Optional[AIAnalysisResult], Optional[List[Dict]]]:
  702. """统一的分析流水线:数据处理 → 统计计算(关键词/AI筛选)→ AI分析 → HTML生成"""
  703. # 根据筛选策略选择数据处理方式
  704. if self.filter_method == "ai":
  705. # === AI 筛选策略 ===
  706. print("[筛选] 使用 AI 智能筛选策略")
  707. ai_filter_result = self.ctx.run_ai_filter(interests_file=self.interests_file)
  708. if ai_filter_result and ai_filter_result.success:
  709. print(f"[筛选] AI 筛选完成: {ai_filter_result.total_matched} 条匹配, {len(ai_filter_result.tags)} 个标签")
  710. # 转换为与关键词匹配相同的数据结构
  711. stats, ai_rss_stats = self.ctx.convert_ai_filter_to_report_data(
  712. ai_filter_result, mode=mode,
  713. new_titles=new_titles, rss_new_urls=rss_new_urls,
  714. )
  715. total_titles = sum(len(titles) for titles in data_source.values())
  716. # AI 筛选的 RSS 结果替换关键词匹配的 RSS 结果
  717. if ai_rss_stats:
  718. rss_items = ai_rss_stats
  719. else:
  720. # AI 筛选失败,回退到关键词匹配
  721. error_msg = ai_filter_result.error if ai_filter_result else "未知错误"
  722. print(f"[筛选] AI 筛选失败: {error_msg},回退到关键词匹配")
  723. stats, total_titles = self.ctx.count_frequency(
  724. data_source, word_groups, filter_words,
  725. id_to_name, title_info, new_titles,
  726. mode=mode, global_filters=global_filters, quiet=quiet,
  727. )
  728. else:
  729. # === 关键词匹配策略(默认)===
  730. stats, total_titles = self.ctx.count_frequency(
  731. data_source, word_groups, filter_words,
  732. id_to_name, title_info, new_titles,
  733. mode=mode, global_filters=global_filters, quiet=quiet,
  734. )
  735. # 如果是 platform 模式,转换数据结构
  736. if self.ctx.display_mode == "platform" and stats:
  737. stats = convert_keyword_stats_to_platform_stats(
  738. stats,
  739. self.ctx.weight_config,
  740. self.ctx.rank_threshold,
  741. )
  742. # AI 分析(如果启用,用于 HTML 报告)
  743. ai_result = None
  744. ai_config = self.ctx.config.get("AI_ANALYSIS", {})
  745. if ai_config.get("ENABLED", False) and stats:
  746. # 获取模式策略来确定报告类型
  747. mode_strategy = self._get_mode_strategy()
  748. report_type = mode_strategy["report_type"]
  749. ai_result = self._run_ai_analysis(
  750. stats, rss_items, mode, report_type, id_to_name,
  751. current_results=data_source, schedule=schedule,
  752. standalone_data=standalone_data
  753. )
  754. # 翻译 RSS 内容(如果启用)— 在 HTML 生成前执行,确保网页版也能展示翻译内容
  755. # 注意:仅翻译 rss_items 和 rss_new_items,不翻译 standalone_data(通知前会重新生成)
  756. # 热榜翻译在推送时由 dispatch_all 处理 report_data
  757. trans_config = self.ctx.config.get("AI_TRANSLATION", {})
  758. if trans_config.get("ENABLED", False):
  759. dispatcher = self.ctx.create_notification_dispatcher()
  760. display_regions = self.ctx.config.get("DISPLAY", {}).get("REGIONS", {})
  761. _, rss_items, rss_new_items, _ = \
  762. dispatcher.translate_content(
  763. report_data={"stats": [], "new_titles": []},
  764. rss_items=rss_items,
  765. rss_new_items=rss_new_items,
  766. display_regions=display_regions,
  767. )
  768. # HTML生成(如果启用)— 使用翻译后的数据
  769. html_file = None
  770. if self.ctx.config["STORAGE"]["FORMATS"]["HTML"]:
  771. html_file = self.ctx.generate_html(
  772. stats,
  773. total_titles,
  774. failed_ids=failed_ids,
  775. new_titles=new_titles,
  776. id_to_name=id_to_name,
  777. mode=mode,
  778. update_info=self.update_info if self.ctx.config["SHOW_VERSION_UPDATE"] else None,
  779. rss_items=rss_items,
  780. rss_new_items=rss_new_items,
  781. ai_analysis=ai_result,
  782. standalone_data=standalone_data,
  783. frequency_file=self.frequency_file,
  784. )
  785. return stats, html_file, ai_result, rss_items
  786. def _send_notification_if_needed(
  787. self,
  788. stats: List[Dict],
  789. report_type: str,
  790. mode: str,
  791. failed_ids: Optional[List] = None,
  792. new_titles: Optional[Dict] = None,
  793. id_to_name: Optional[Dict] = None,
  794. html_file_path: Optional[str] = None,
  795. rss_items: Optional[List[Dict]] = None,
  796. rss_new_items: Optional[List[Dict]] = None,
  797. standalone_data: Optional[Dict] = None,
  798. ai_result: Optional[AIAnalysisResult] = None,
  799. current_results: Optional[Dict] = None,
  800. schedule: ResolvedSchedule = None,
  801. ) -> bool:
  802. """统一的通知发送逻辑,包含所有判断条件,支持热榜+RSS合并推送+AI分析+独立展示区"""
  803. has_notification = self._has_notification_configured()
  804. cfg = self.ctx.config
  805. # 检查是否有有效内容(热榜或RSS)
  806. has_news_content = self._has_valid_content(stats, new_titles)
  807. has_rss_content = bool(rss_items and len(rss_items) > 0)
  808. has_any_content = has_news_content or has_rss_content
  809. # 计算热榜匹配条数
  810. news_count = sum(len(stat.get("titles", [])) for stat in stats) if stats else 0
  811. rss_count = sum(stat.get("count", 0) for stat in rss_items) if rss_items else 0
  812. if (
  813. cfg["ENABLE_NOTIFICATION"]
  814. and has_notification
  815. and has_any_content
  816. ):
  817. # 输出推送内容统计
  818. content_parts = []
  819. if news_count > 0:
  820. content_parts.append(f"热榜 {news_count} 条")
  821. if rss_count > 0:
  822. content_parts.append(f"RSS {rss_count} 条")
  823. total_count = news_count + rss_count
  824. print(f"[推送] 准备发送:{' + '.join(content_parts)},合计 {total_count} 条")
  825. # 调度系统决策
  826. if not schedule.push:
  827. print("[推送] 调度器: 当前时间段不执行推送")
  828. return False
  829. if schedule.once_push and schedule.period_key:
  830. scheduler = self.ctx.create_scheduler()
  831. date_str = self.ctx.format_date()
  832. if scheduler.already_executed(schedule.period_key, "push", date_str):
  833. print(f"[推送] 调度器: 时间段 {schedule.period_name or schedule.period_key} 今天已推送过,跳过")
  834. return False
  835. else:
  836. print(f"[推送] 调度器: 时间段 {schedule.period_name or schedule.period_key} 今天首次推送")
  837. # AI 分析:优先使用传入的结果,避免重复分析
  838. if ai_result is None:
  839. ai_config = cfg.get("AI_ANALYSIS", {})
  840. if ai_config.get("ENABLED", False):
  841. ai_result = self._run_ai_analysis(
  842. stats, rss_items, mode, report_type, id_to_name,
  843. current_results=current_results, schedule=schedule
  844. )
  845. # 准备报告数据
  846. report_data = self.ctx.prepare_report(stats, failed_ids, new_titles, id_to_name, mode, frequency_file=self.frequency_file)
  847. # 是否发送版本更新信息
  848. update_info_to_send = self.update_info if cfg["SHOW_VERSION_UPDATE"] else None
  849. # 使用 NotificationDispatcher 发送到所有渠道
  850. # RSS/独立展示区数据已在分析流水线中翻译过,跳过重复翻译(仅翻译热榜 report_data)
  851. dispatcher = self.ctx.create_notification_dispatcher()
  852. results = dispatcher.dispatch_all(
  853. report_data=report_data,
  854. report_type=report_type,
  855. update_info=update_info_to_send,
  856. proxy_url=self.proxy_url,
  857. mode=mode,
  858. html_file_path=html_file_path,
  859. rss_items=rss_items,
  860. rss_new_items=rss_new_items,
  861. ai_analysis=ai_result,
  862. standalone_data=standalone_data,
  863. skip_translation=True,
  864. )
  865. if not results:
  866. print("未配置任何通知渠道,跳过通知发送")
  867. return False
  868. # 记录推送成功
  869. if any(results.values()):
  870. if schedule.once_push and schedule.period_key:
  871. scheduler = self.ctx.create_scheduler()
  872. date_str = self.ctx.format_date()
  873. scheduler.record_execution(schedule.period_key, "push", date_str)
  874. return True
  875. elif cfg["ENABLE_NOTIFICATION"] and not has_notification:
  876. print("⚠️ 警告:通知功能已启用但未配置任何通知渠道,将跳过通知发送")
  877. elif not cfg["ENABLE_NOTIFICATION"]:
  878. print(f"跳过{report_type}通知:通知功能已禁用")
  879. elif (
  880. cfg["ENABLE_NOTIFICATION"]
  881. and has_notification
  882. and not has_any_content
  883. ):
  884. mode_strategy = self._get_mode_strategy()
  885. if self.report_mode == "incremental":
  886. if not has_rss_content:
  887. print("跳过通知:增量模式下未检测到匹配的新闻和RSS")
  888. else:
  889. print("跳过通知:增量模式下新闻未匹配到关键词")
  890. else:
  891. print(
  892. f"跳过通知:{mode_strategy['mode_name']}下未检测到匹配的新闻"
  893. )
  894. return False
  895. def _initialize_and_check_config(self) -> bool:
  896. """通用初始化和配置检查。返回 True 表示可以继续执行。"""
  897. now = self.ctx.get_time()
  898. print(f"当前北京时间: {now.strftime('%Y-%m-%d %H:%M:%S')}")
  899. if not self.ctx.config["ENABLE_CRAWLER"]:
  900. print("爬虫功能已禁用(ENABLE_CRAWLER=False),程序退出")
  901. return False
  902. has_notification = self._has_notification_configured()
  903. if not self.ctx.config["ENABLE_NOTIFICATION"]:
  904. print("通知功能已禁用(ENABLE_NOTIFICATION=False),将只进行数据抓取")
  905. elif not has_notification:
  906. print("未配置任何通知渠道,将只进行数据抓取,不发送通知")
  907. else:
  908. print("通知功能已启用,将发送通知")
  909. mode_strategy = self._get_mode_strategy()
  910. print(f"报告模式: {self.report_mode}")
  911. print(f"运行模式: {mode_strategy['description']}")
  912. return True
  913. def _crawl_data(self) -> Tuple[Dict, Dict, List]:
  914. """执行数据爬取"""
  915. ids = []
  916. for platform in self.ctx.platforms:
  917. if "name" in platform:
  918. ids.append((platform["id"], platform["name"]))
  919. else:
  920. ids.append(platform["id"])
  921. print(
  922. f"配置的监控平台: {[p.get('name', p['id']) for p in self.ctx.platforms]}"
  923. )
  924. print(f"开始爬取数据,请求间隔 {self.request_interval} 毫秒")
  925. Path("output").mkdir(parents=True, exist_ok=True)
  926. results, id_to_name, failed_ids = self.data_fetcher.crawl_websites(
  927. ids, self.request_interval
  928. )
  929. # 转换为 NewsData 格式并保存到存储后端
  930. crawl_time = self.ctx.format_time()
  931. crawl_date = self.ctx.format_date()
  932. news_data = convert_crawl_results_to_news_data(
  933. results, id_to_name, failed_ids, crawl_time, crawl_date
  934. )
  935. # 保存到存储后端(SQLite)
  936. if self.storage_manager.save_news_data(news_data):
  937. print(f"数据已保存到存储后端: {self.storage_manager.backend_name}")
  938. # 保存 TXT 快照(如果启用)
  939. txt_file = self.storage_manager.save_txt_snapshot(news_data)
  940. if txt_file:
  941. print(f"TXT 快照已保存: {txt_file}")
  942. return results, id_to_name, failed_ids
  943. def _crawl_rss_data(self) -> Tuple[Optional[List[Dict]], Optional[List[Dict]], Optional[List[Dict]], set]:
  944. """
  945. 执行 RSS 数据抓取
  946. Returns:
  947. (rss_items, rss_new_items, raw_rss_items, rss_new_urls) 元组:
  948. - rss_items: 统计条目列表(按模式处理,用于统计区块)
  949. - rss_new_items: 新增条目列表(用于新增区块)
  950. - raw_rss_items: 原始 RSS 条目列表(用于独立展示区)
  951. - rss_new_urls: 原始新增 RSS 条目的 URL 集合(用于 AI 模式 is_new 检测)
  952. 如果未启用或失败返回 (None, None, None, set())
  953. """
  954. if not self.ctx.rss_enabled:
  955. return None, None, None, set()
  956. rss_feeds = self.ctx.rss_feeds
  957. if not rss_feeds:
  958. print("[RSS] 未配置任何 RSS 源")
  959. return None, None, None, set()
  960. try:
  961. from trendradar.crawler.rss import RSSFetcher, RSSFeedConfig
  962. # 构建 RSS 源配置
  963. feeds = []
  964. for feed_config in rss_feeds:
  965. # 读取并验证单个 feed 的 max_age_days(可选)
  966. max_age_days_raw = feed_config.get("max_age_days")
  967. max_age_days = None
  968. if max_age_days_raw is not None:
  969. try:
  970. max_age_days = int(max_age_days_raw)
  971. if max_age_days < 0:
  972. feed_id = feed_config.get("id", "unknown")
  973. print(f"[警告] RSS feed '{feed_id}' 的 max_age_days 为负数,将使用全局默认值")
  974. max_age_days = None
  975. except (ValueError, TypeError):
  976. feed_id = feed_config.get("id", "unknown")
  977. print(f"[警告] RSS feed '{feed_id}' 的 max_age_days 格式错误:{max_age_days_raw}")
  978. max_age_days = None
  979. feed = RSSFeedConfig(
  980. id=feed_config.get("id", ""),
  981. name=feed_config.get("name", ""),
  982. url=feed_config.get("url", ""),
  983. max_items=feed_config.get("max_items", 50),
  984. enabled=feed_config.get("enabled", True),
  985. max_age_days=max_age_days, # None=使用全局,0=禁用,>0=覆盖
  986. )
  987. if feed.id and feed.url and feed.enabled:
  988. feeds.append(feed)
  989. if not feeds:
  990. print("[RSS] 没有启用的 RSS 源")
  991. return None, None, None, set()
  992. # 创建抓取器
  993. rss_config = self.ctx.rss_config
  994. # RSS 代理:优先使用 RSS 专属代理,否则使用爬虫默认代理
  995. rss_proxy_url = rss_config.get("PROXY_URL", "") or self.proxy_url or ""
  996. # 获取配置的时区
  997. timezone = self.ctx.config.get("TIMEZONE", DEFAULT_TIMEZONE)
  998. # 获取新鲜度过滤配置
  999. freshness_config = rss_config.get("FRESHNESS_FILTER", {})
  1000. freshness_enabled = freshness_config.get("ENABLED", True)
  1001. default_max_age_days = freshness_config.get("MAX_AGE_DAYS", 3)
  1002. fetcher = RSSFetcher(
  1003. feeds=feeds,
  1004. request_interval=rss_config.get("REQUEST_INTERVAL", 2000),
  1005. timeout=rss_config.get("TIMEOUT", 15),
  1006. use_proxy=rss_config.get("USE_PROXY", False),
  1007. proxy_url=rss_proxy_url,
  1008. timezone=timezone,
  1009. freshness_enabled=freshness_enabled,
  1010. default_max_age_days=default_max_age_days,
  1011. )
  1012. # 抓取数据
  1013. rss_data = fetcher.fetch_all()
  1014. # 保存到存储后端
  1015. if self.storage_manager.save_rss_data(rss_data):
  1016. print(f"[RSS] 数据已保存到存储后端")
  1017. # 处理 RSS 数据(按模式过滤)并返回用于合并推送
  1018. return self._process_rss_data_by_mode(rss_data)
  1019. else:
  1020. print(f"[RSS] 数据保存失败")
  1021. return None, None, None, set()
  1022. except ImportError as e:
  1023. print(f"[RSS] 缺少依赖: {e}")
  1024. print("[RSS] 请安装 feedparser: pip install feedparser")
  1025. return None, None, None, set()
  1026. except Exception as e:
  1027. print(f"[RSS] 抓取失败: {e}")
  1028. return None, None, None, set()
  1029. def _process_rss_data_by_mode(self, rss_data) -> Tuple[Optional[List[Dict]], Optional[List[Dict]], Optional[List[Dict]], set]:
  1030. """
  1031. 按报告模式处理 RSS 数据,返回与热榜相同格式的统计结构
  1032. 三种模式:
  1033. - daily: 当日汇总,统计=当天所有条目,新增=本次新增条目
  1034. - current: 当前榜单,统计=当前榜单条目,新增=本次新增条目
  1035. - incremental: 增量模式,统计=新增条目,新增=无
  1036. Args:
  1037. rss_data: 当前抓取的 RSSData 对象
  1038. Returns:
  1039. (rss_stats, rss_new_stats, raw_rss_items, rss_new_urls) 元组:
  1040. - rss_stats: RSS 关键词统计列表(与热榜 stats 格式一致)
  1041. - rss_new_stats: RSS 新增关键词统计列表(与热榜 stats 格式一致)
  1042. - raw_rss_items: 原始 RSS 条目列表(用于独立展示区)
  1043. - rss_new_urls: 原始新增 RSS 条目的 URL 集合(未经关键词过滤,用于 AI 模式 is_new 检测)
  1044. """
  1045. from trendradar.core.analyzer import count_rss_frequency
  1046. # 从 display.regions.rss 统一控制 RSS 分析和展示
  1047. rss_display_enabled = self.ctx.config.get("DISPLAY", {}).get("REGIONS", {}).get("RSS", True)
  1048. # 加载关键词配置
  1049. try:
  1050. word_groups, filter_words, global_filters = self.ctx.load_frequency_words(self.frequency_file)
  1051. except FileNotFoundError:
  1052. word_groups, filter_words, global_filters = [], [], []
  1053. timezone = self.ctx.timezone
  1054. max_news_per_keyword = self.ctx.config.get("MAX_NEWS_PER_KEYWORD", 0)
  1055. sort_by_position_first = self.ctx.config.get("SORT_BY_POSITION_FIRST", False)
  1056. rss_stats = None
  1057. rss_new_stats = None
  1058. raw_rss_items = None # 原始 RSS 条目列表(用于独立展示区)
  1059. rss_new_urls = set() # 原始新增 RSS URLs(未经关键词过滤)
  1060. # 1. 首先获取原始条目(用于独立展示区,不受 display.regions.rss 影响)
  1061. # 根据模式获取原始条目
  1062. if self.report_mode == "incremental":
  1063. new_items_dict = self.storage_manager.detect_new_rss_items(rss_data)
  1064. if new_items_dict:
  1065. raw_rss_items = self._convert_rss_items_to_list(new_items_dict, rss_data.id_to_name)
  1066. elif self.report_mode == "current":
  1067. latest_data = self.storage_manager.get_latest_rss_data(rss_data.date)
  1068. if latest_data:
  1069. raw_rss_items = self._convert_rss_items_to_list(latest_data.items, latest_data.id_to_name)
  1070. else: # daily
  1071. all_data = self.storage_manager.get_rss_data(rss_data.date)
  1072. if all_data:
  1073. raw_rss_items = self._convert_rss_items_to_list(all_data.items, all_data.id_to_name)
  1074. # 如果 RSS 展示未启用,跳过关键词分析,只返回原始条目用于独立展示区
  1075. if not rss_display_enabled:
  1076. return None, None, raw_rss_items, rss_new_urls
  1077. # 2. 获取新增条目(用于统计)
  1078. new_items_dict = self.storage_manager.detect_new_rss_items(rss_data)
  1079. new_items_list = None
  1080. if new_items_dict:
  1081. new_items_list = self._convert_rss_items_to_list(new_items_dict, rss_data.id_to_name)
  1082. if new_items_list:
  1083. print(f"[RSS] 检测到 {len(new_items_list)} 条新增")
  1084. # 收集原始新增 URLs(未经关键词过滤,用于 AI 模式 is_new 检测)
  1085. rss_new_urls = {item["url"] for item in new_items_list if item.get("url")}
  1086. # 3. 根据模式获取统计条目
  1087. if self.report_mode == "incremental":
  1088. # 增量模式:统计条目就是新增条目
  1089. if not new_items_list:
  1090. print("[RSS] 增量模式:没有新增 RSS 条目")
  1091. return None, None, raw_rss_items, rss_new_urls
  1092. rss_stats, total = count_rss_frequency(
  1093. rss_items=new_items_list,
  1094. word_groups=word_groups,
  1095. filter_words=filter_words,
  1096. global_filters=global_filters,
  1097. new_items=new_items_list, # 增量模式所有都是新增
  1098. max_news_per_keyword=max_news_per_keyword,
  1099. sort_by_position_first=sort_by_position_first,
  1100. timezone=timezone,
  1101. rank_threshold=self.rank_threshold,
  1102. quiet=False,
  1103. )
  1104. if not rss_stats:
  1105. print("[RSS] 增量模式:关键词匹配后没有内容")
  1106. # 即使关键词匹配为空,也返回原始条目用于独立展示区
  1107. return None, None, raw_rss_items, rss_new_urls
  1108. elif self.report_mode == "current":
  1109. # 当前榜单模式:统计=当前榜单所有条目
  1110. # raw_rss_items 已在前面获取
  1111. if not raw_rss_items:
  1112. print("[RSS] 当前榜单模式:没有 RSS 数据")
  1113. return None, None, None, rss_new_urls
  1114. rss_stats, total = count_rss_frequency(
  1115. rss_items=raw_rss_items,
  1116. word_groups=word_groups,
  1117. filter_words=filter_words,
  1118. global_filters=global_filters,
  1119. new_items=new_items_list, # 标记新增
  1120. max_news_per_keyword=max_news_per_keyword,
  1121. sort_by_position_first=sort_by_position_first,
  1122. timezone=timezone,
  1123. rank_threshold=self.rank_threshold,
  1124. quiet=False,
  1125. )
  1126. if not rss_stats:
  1127. print("[RSS] 当前榜单模式:关键词匹配后没有内容")
  1128. # 即使关键词匹配为空,也返回原始条目用于独立展示区
  1129. return None, None, raw_rss_items, rss_new_urls
  1130. # 生成新增统计
  1131. if new_items_list:
  1132. rss_new_stats, _ = count_rss_frequency(
  1133. rss_items=new_items_list,
  1134. word_groups=word_groups,
  1135. filter_words=filter_words,
  1136. global_filters=global_filters,
  1137. new_items=new_items_list,
  1138. max_news_per_keyword=max_news_per_keyword,
  1139. sort_by_position_first=sort_by_position_first,
  1140. timezone=timezone,
  1141. rank_threshold=self.rank_threshold,
  1142. quiet=True,
  1143. )
  1144. else:
  1145. # daily 模式:统计=当天所有条目
  1146. # raw_rss_items 已在前面获取
  1147. if not raw_rss_items:
  1148. print("[RSS] 当日汇总模式:没有 RSS 数据")
  1149. return None, None, None, rss_new_urls
  1150. rss_stats, total = count_rss_frequency(
  1151. rss_items=raw_rss_items,
  1152. word_groups=word_groups,
  1153. filter_words=filter_words,
  1154. global_filters=global_filters,
  1155. new_items=new_items_list, # 标记新增
  1156. max_news_per_keyword=max_news_per_keyword,
  1157. sort_by_position_first=sort_by_position_first,
  1158. timezone=timezone,
  1159. rank_threshold=self.rank_threshold,
  1160. quiet=False,
  1161. )
  1162. if not rss_stats:
  1163. print("[RSS] 当日汇总模式:关键词匹配后没有内容")
  1164. # 即使关键词匹配为空,也返回原始条目用于独立展示区
  1165. return None, None, raw_rss_items, rss_new_urls
  1166. # 生成新增统计
  1167. if new_items_list:
  1168. rss_new_stats, _ = count_rss_frequency(
  1169. rss_items=new_items_list,
  1170. word_groups=word_groups,
  1171. filter_words=filter_words,
  1172. global_filters=global_filters,
  1173. new_items=new_items_list,
  1174. max_news_per_keyword=max_news_per_keyword,
  1175. sort_by_position_first=sort_by_position_first,
  1176. timezone=timezone,
  1177. rank_threshold=self.rank_threshold,
  1178. quiet=True,
  1179. )
  1180. return rss_stats, rss_new_stats, raw_rss_items, rss_new_urls
  1181. def _convert_rss_items_to_list(self, items_dict: Dict, id_to_name: Dict) -> List[Dict]:
  1182. """将 RSS 条目字典转换为列表格式,并应用新鲜度过滤(用于推送)"""
  1183. rss_items = []
  1184. filtered_count = 0
  1185. filtered_details = [] # 用于 DEBUG 模式下的详细日志
  1186. # 获取新鲜度过滤配置
  1187. rss_config = self.ctx.rss_config
  1188. freshness_config = rss_config.get("FRESHNESS_FILTER", {})
  1189. freshness_enabled = freshness_config.get("ENABLED", True)
  1190. default_max_age_days = freshness_config.get("MAX_AGE_DAYS", 3)
  1191. timezone = self.ctx.config.get("TIMEZONE", DEFAULT_TIMEZONE)
  1192. debug_mode = self.ctx.config.get("DEBUG", False)
  1193. # 构建 feed_id -> max_age_days 的映射
  1194. feed_max_age_map = {}
  1195. for feed_cfg in self.ctx.rss_feeds:
  1196. feed_id = feed_cfg.get("id", "")
  1197. max_age = feed_cfg.get("max_age_days")
  1198. if max_age is not None:
  1199. try:
  1200. feed_max_age_map[feed_id] = int(max_age)
  1201. except (ValueError, TypeError):
  1202. pass
  1203. for feed_id, items in items_dict.items():
  1204. # 确定此 feed 的 max_age_days
  1205. max_days = feed_max_age_map.get(feed_id)
  1206. if max_days is None:
  1207. max_days = default_max_age_days
  1208. for item in items:
  1209. # 应用新鲜度过滤(仅在启用时)
  1210. if freshness_enabled and max_days > 0:
  1211. if item.published_at and not is_within_days(item.published_at, max_days, timezone):
  1212. filtered_count += 1
  1213. # 记录详细信息用于 DEBUG 模式
  1214. if debug_mode:
  1215. days_old = calculate_days_old(item.published_at, timezone)
  1216. feed_name = id_to_name.get(feed_id, feed_id)
  1217. filtered_details.append({
  1218. "title": item.title[:50] + "..." if len(item.title) > 50 else item.title,
  1219. "feed": feed_name,
  1220. "days_old": days_old,
  1221. "max_days": max_days,
  1222. })
  1223. continue # 跳过超过指定天数的文章
  1224. rss_items.append({
  1225. "title": item.title,
  1226. "feed_id": feed_id,
  1227. "feed_name": id_to_name.get(feed_id, feed_id),
  1228. "url": item.url,
  1229. "published_at": item.published_at,
  1230. "summary": item.summary,
  1231. "author": item.author,
  1232. })
  1233. # 输出过滤统计
  1234. if filtered_count > 0:
  1235. print(f"[RSS] 新鲜度过滤:跳过 {filtered_count} 篇超过指定天数的旧文章(仍保留在数据库中)")
  1236. # DEBUG 模式下显示详细信息
  1237. if debug_mode and filtered_details:
  1238. print(f"[RSS] 被过滤的文章详情(共 {len(filtered_details)} 篇):")
  1239. for detail in filtered_details[:10]: # 最多显示 10 条
  1240. days_str = f"{detail['days_old']:.1f}" if detail['days_old'] else "未知"
  1241. print(f" - [{days_str}天前] [{detail['feed']}] {detail['title']} (限制: {detail['max_days']}天)")
  1242. if len(filtered_details) > 10:
  1243. print(f" ... 还有 {len(filtered_details) - 10} 篇被过滤")
  1244. return rss_items
  1245. def _filter_rss_by_keywords(self, rss_items: List[Dict]) -> List[Dict]:
  1246. """使用关键词文件过滤 RSS 条目"""
  1247. try:
  1248. word_groups, filter_words, global_filters = self.ctx.load_frequency_words(self.frequency_file)
  1249. if word_groups or filter_words or global_filters:
  1250. from trendradar.core.frequency import matches_word_groups
  1251. filtered_items = []
  1252. for item in rss_items:
  1253. title = item.get("title", "")
  1254. if matches_word_groups(title, word_groups, filter_words, global_filters):
  1255. filtered_items.append(item)
  1256. original_count = len(rss_items)
  1257. rss_items = filtered_items
  1258. print(f"[RSS] 关键词过滤后剩余 {len(rss_items)}/{original_count} 条")
  1259. if not rss_items:
  1260. print("[RSS] 关键词过滤后没有匹配内容")
  1261. return []
  1262. except FileNotFoundError:
  1263. # 关键词文件不存在时跳过过滤
  1264. pass
  1265. return rss_items
  1266. def _generate_rss_html_report(self, rss_items: list, feeds_info: dict) -> str:
  1267. """生成 RSS HTML 报告"""
  1268. try:
  1269. from trendradar.report.rss_html import render_rss_html_content
  1270. from pathlib import Path
  1271. html_content = render_rss_html_content(
  1272. rss_items=rss_items,
  1273. total_count=len(rss_items),
  1274. feeds_info=feeds_info,
  1275. get_time_func=self.ctx.get_time,
  1276. )
  1277. # 保存 HTML 文件(扁平化结构:output/html/日期/)
  1278. date_folder = self.ctx.format_date()
  1279. time_filename = self.ctx.format_time()
  1280. output_dir = Path("output") / "html" / date_folder
  1281. output_dir.mkdir(parents=True, exist_ok=True)
  1282. file_path = output_dir / f"rss_{time_filename}.html"
  1283. with open(file_path, "w", encoding="utf-8") as f:
  1284. f.write(html_content)
  1285. print(f"[RSS] HTML 报告已生成: {file_path}")
  1286. return str(file_path)
  1287. except Exception as e:
  1288. print(f"[RSS] 生成 HTML 报告失败: {e}")
  1289. return None
  1290. def _execute_mode_strategy(
  1291. self, mode_strategy: Dict, results: Dict, id_to_name: Dict, failed_ids: List,
  1292. rss_items: Optional[List[Dict]] = None,
  1293. rss_new_items: Optional[List[Dict]] = None,
  1294. raw_rss_items: Optional[List[Dict]] = None,
  1295. rss_new_urls: Optional[set] = None,
  1296. ) -> Optional[str]:
  1297. """执行模式特定逻辑,支持热榜+RSS合并推送
  1298. 简化后的逻辑:
  1299. - 每次运行都生成 HTML 报告(时间戳快照 + latest/{mode}.html + index.html)
  1300. - 根据模式发送通知
  1301. """
  1302. # 调度系统
  1303. scheduler = self.ctx.create_scheduler()
  1304. schedule = scheduler.resolve()
  1305. # 使用 schedule 决定的 report_mode 覆盖全局配置
  1306. effective_mode = schedule.report_mode
  1307. if effective_mode != self.report_mode:
  1308. print(f"[调度] 报告模式覆盖: {self.report_mode} -> {effective_mode}")
  1309. self.report_mode = effective_mode
  1310. # 重新获取 mode_strategy,确保 report_type 与覆盖后的 report_mode 一致
  1311. mode_strategy = self._get_mode_strategy()
  1312. # 使用 schedule 决定的 frequency_file 覆盖默认值
  1313. self.frequency_file = schedule.frequency_file
  1314. # 使用 schedule 决定的筛选策略覆盖默认值
  1315. self.filter_method = schedule.filter_method or self.ctx.filter_method
  1316. # 使用 schedule 决定的 AI 筛选兴趣文件覆盖默认值
  1317. self.interests_file = schedule.interests_file
  1318. # 如果调度器说不采集,则直接跳过
  1319. if not schedule.collect:
  1320. print("[调度] 当前时间段不执行数据采集,跳过分析流水线")
  1321. return None
  1322. # 获取当前监控平台ID列表
  1323. current_platform_ids = self.ctx.platform_ids
  1324. new_titles = self.ctx.detect_new_titles(current_platform_ids)
  1325. time_info = self.ctx.format_time()
  1326. word_groups, filter_words, global_filters = self.ctx.load_frequency_words(self.frequency_file)
  1327. html_file = None
  1328. stats = []
  1329. ai_result = None
  1330. title_info = None
  1331. # current 模式需要使用完整的历史数据
  1332. if self.report_mode == "current":
  1333. analysis_data = self._load_analysis_data()
  1334. if analysis_data:
  1335. (
  1336. all_results,
  1337. historical_id_to_name,
  1338. historical_title_info,
  1339. historical_new_titles,
  1340. _,
  1341. _,
  1342. _,
  1343. ) = analysis_data
  1344. print(
  1345. f"current模式:使用过滤后的历史数据,包含平台:{list(all_results.keys())}"
  1346. )
  1347. # 使用历史数据准备独立展示区数据(包含完整的 title_info)
  1348. standalone_data = self._prepare_standalone_data(
  1349. all_results, historical_id_to_name, historical_title_info, raw_rss_items
  1350. )
  1351. stats, html_file, ai_result, rss_items = self._run_analysis_pipeline(
  1352. all_results,
  1353. self.report_mode,
  1354. historical_title_info,
  1355. historical_new_titles,
  1356. word_groups,
  1357. filter_words,
  1358. historical_id_to_name,
  1359. failed_ids=failed_ids,
  1360. global_filters=global_filters,
  1361. rss_items=rss_items,
  1362. rss_new_items=rss_new_items,
  1363. standalone_data=standalone_data,
  1364. schedule=schedule,
  1365. rss_new_urls=rss_new_urls,
  1366. )
  1367. combined_id_to_name = {**historical_id_to_name, **id_to_name}
  1368. new_titles = historical_new_titles
  1369. id_to_name = combined_id_to_name
  1370. title_info = historical_title_info
  1371. results = all_results
  1372. else:
  1373. print("❌ 严重错误:无法读取刚保存的数据文件")
  1374. raise RuntimeError("数据一致性检查失败:保存后立即读取失败")
  1375. elif self.report_mode == "daily":
  1376. # daily 模式:使用全天累计数据
  1377. analysis_data = self._load_analysis_data()
  1378. if analysis_data:
  1379. (
  1380. all_results,
  1381. historical_id_to_name,
  1382. historical_title_info,
  1383. historical_new_titles,
  1384. _,
  1385. _,
  1386. _,
  1387. ) = analysis_data
  1388. # 使用历史数据准备独立展示区数据(包含完整的 title_info)
  1389. standalone_data = self._prepare_standalone_data(
  1390. all_results, historical_id_to_name, historical_title_info, raw_rss_items
  1391. )
  1392. stats, html_file, ai_result, rss_items = self._run_analysis_pipeline(
  1393. all_results,
  1394. self.report_mode,
  1395. historical_title_info,
  1396. historical_new_titles,
  1397. word_groups,
  1398. filter_words,
  1399. historical_id_to_name,
  1400. failed_ids=failed_ids,
  1401. global_filters=global_filters,
  1402. rss_items=rss_items,
  1403. rss_new_items=rss_new_items,
  1404. standalone_data=standalone_data,
  1405. schedule=schedule,
  1406. rss_new_urls=rss_new_urls,
  1407. )
  1408. combined_id_to_name = {**historical_id_to_name, **id_to_name}
  1409. new_titles = historical_new_titles
  1410. id_to_name = combined_id_to_name
  1411. title_info = historical_title_info
  1412. results = all_results
  1413. else:
  1414. # 没有历史数据时使用当前数据
  1415. title_info = self._prepare_current_title_info(results, time_info)
  1416. standalone_data = self._prepare_standalone_data(
  1417. results, id_to_name, title_info, raw_rss_items
  1418. )
  1419. stats, html_file, ai_result, rss_items = self._run_analysis_pipeline(
  1420. results,
  1421. self.report_mode,
  1422. title_info,
  1423. new_titles,
  1424. word_groups,
  1425. filter_words,
  1426. id_to_name,
  1427. failed_ids=failed_ids,
  1428. global_filters=global_filters,
  1429. rss_items=rss_items,
  1430. rss_new_items=rss_new_items,
  1431. standalone_data=standalone_data,
  1432. schedule=schedule,
  1433. rss_new_urls=rss_new_urls,
  1434. )
  1435. else:
  1436. # incremental 模式:只使用当前抓取的数据
  1437. title_info = self._prepare_current_title_info(results, time_info)
  1438. standalone_data = self._prepare_standalone_data(
  1439. results, id_to_name, title_info, raw_rss_items
  1440. )
  1441. stats, html_file, ai_result, rss_items = self._run_analysis_pipeline(
  1442. results,
  1443. self.report_mode,
  1444. title_info,
  1445. new_titles,
  1446. word_groups,
  1447. filter_words,
  1448. id_to_name,
  1449. failed_ids=failed_ids,
  1450. global_filters=global_filters,
  1451. rss_items=rss_items,
  1452. rss_new_items=rss_new_items,
  1453. standalone_data=standalone_data,
  1454. schedule=schedule,
  1455. rss_new_urls=rss_new_urls,
  1456. )
  1457. if html_file:
  1458. print(f"HTML报告已生成: {html_file}")
  1459. print(f"最新报告已更新: output/html/latest/{self.report_mode}.html")
  1460. # 发送通知
  1461. if mode_strategy["should_send_notification"]:
  1462. standalone_data = self._prepare_standalone_data(
  1463. results, id_to_name, title_info, raw_rss_items
  1464. )
  1465. self._send_notification_if_needed(
  1466. stats,
  1467. mode_strategy["report_type"],
  1468. self.report_mode,
  1469. failed_ids=failed_ids,
  1470. new_titles=new_titles,
  1471. id_to_name=id_to_name,
  1472. html_file_path=html_file,
  1473. rss_items=rss_items,
  1474. rss_new_items=rss_new_items,
  1475. standalone_data=standalone_data,
  1476. ai_result=ai_result,
  1477. current_results=results,
  1478. schedule=schedule,
  1479. )
  1480. # 打开浏览器(仅在非容器环境)
  1481. if self._should_open_browser() and html_file:
  1482. file_url = "file://" + str(Path(html_file).resolve())
  1483. print(f"正在打开HTML报告: {file_url}")
  1484. webbrowser.open(file_url)
  1485. elif self.is_docker_container and html_file:
  1486. print(f"HTML报告已生成(Docker环境): {html_file}")
  1487. return html_file
  1488. def run(self) -> None:
  1489. """执行分析流程"""
  1490. try:
  1491. if not self._initialize_and_check_config():
  1492. return
  1493. mode_strategy = self._get_mode_strategy()
  1494. # 抓取热榜数据
  1495. results, id_to_name, failed_ids = self._crawl_data()
  1496. # 抓取 RSS 数据(如果启用),返回统计条目、新增条目和原始条目
  1497. rss_items, rss_new_items, raw_rss_items, rss_new_urls = self._crawl_rss_data()
  1498. # 执行模式策略,传递 RSS 数据用于合并推送
  1499. self._execute_mode_strategy(
  1500. mode_strategy, results, id_to_name, failed_ids,
  1501. rss_items=rss_items, rss_new_items=rss_new_items,
  1502. raw_rss_items=raw_rss_items, rss_new_urls=rss_new_urls
  1503. )
  1504. except Exception as e:
  1505. print(f"分析流程执行出错: {e}")
  1506. if self.ctx.config.get("DEBUG", False):
  1507. raise
  1508. finally:
  1509. # 清理资源(包括过期数据清理和数据库连接关闭)
  1510. self.ctx.cleanup()
  1511. def _record_doctor_result(results: List[Tuple[str, str, str]], status: str, item: str, detail: str) -> None:
  1512. """记录并打印 doctor 检查结果"""
  1513. icon_map = {
  1514. "pass": "✅",
  1515. "warn": "⚠️",
  1516. "fail": "❌",
  1517. }
  1518. icon = icon_map.get(status, "•")
  1519. results.append((status, item, detail))
  1520. print(f"{icon} {item}: {detail}")
  1521. def _save_doctor_report(
  1522. results: List[Tuple[str, str, str]],
  1523. pass_count: int,
  1524. warn_count: int,
  1525. fail_count: int,
  1526. config_path: Optional[str],
  1527. ) -> None:
  1528. """保存 doctor 体检报告到 JSON 文件"""
  1529. report = {
  1530. "version": __version__,
  1531. "generated_at": datetime.now(timezone.utc).isoformat(),
  1532. "config_path": config_path or os.environ.get("CONFIG_PATH", "config/config.yaml"),
  1533. "summary": {
  1534. "pass": pass_count,
  1535. "warn": warn_count,
  1536. "fail": fail_count,
  1537. "ok": fail_count == 0,
  1538. },
  1539. "checks": [
  1540. {"status": status, "item": item, "detail": detail}
  1541. for status, item, detail in results
  1542. ],
  1543. }
  1544. try:
  1545. output_dir = Path("output") / "meta"
  1546. output_dir.mkdir(parents=True, exist_ok=True)
  1547. output_path = output_dir / "doctor_report.json"
  1548. output_path.write_text(
  1549. json.dumps(report, ensure_ascii=False, indent=2),
  1550. encoding="utf-8",
  1551. )
  1552. print(f"体检报告已保存: {output_path}")
  1553. except Exception as e:
  1554. print(f"⚠️ 体检报告保存失败: {e}")
  1555. def _run_doctor(config_path: Optional[str] = None) -> bool:
  1556. """运行环境体检"""
  1557. print("=" * 60)
  1558. print(f"TrendRadar v{__version__} 环境体检")
  1559. print("=" * 60)
  1560. results: List[Tuple[str, str, str]] = []
  1561. config = None
  1562. # 1) Python 版本检查
  1563. py_ok = sys.version_info >= (3, 10)
  1564. py_version = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
  1565. if py_ok:
  1566. _record_doctor_result(results, "pass", "Python版本", f"{py_version} (满足 >= 3.10)")
  1567. else:
  1568. _record_doctor_result(results, "fail", "Python版本", f"{py_version} (不满足 >= 3.10)")
  1569. # 2) 关键文件检查
  1570. if config_path is None:
  1571. config_path = os.environ.get("CONFIG_PATH", "config/config.yaml")
  1572. required_files = [
  1573. (config_path, "主配置文件"),
  1574. ("config/frequency_words.txt", "关键词文件"),
  1575. ]
  1576. optional_files = [
  1577. ("config/timeline.yaml", "调度文件"),
  1578. ]
  1579. for path_str, desc in required_files:
  1580. if Path(path_str).exists():
  1581. _record_doctor_result(results, "pass", desc, f"已找到: {path_str}")
  1582. else:
  1583. _record_doctor_result(results, "fail", desc, f"缺失: {path_str}")
  1584. for path_str, desc in optional_files:
  1585. if Path(path_str).exists():
  1586. _record_doctor_result(results, "pass", desc, f"已找到: {path_str}")
  1587. else:
  1588. _record_doctor_result(results, "warn", desc, f"未找到: {path_str}(将使用默认调度模板)")
  1589. # 3) 配置加载检查
  1590. try:
  1591. config = load_config(config_path)
  1592. _record_doctor_result(results, "pass", "配置加载", f"加载成功: {config_path}")
  1593. except Exception as e:
  1594. _record_doctor_result(results, "fail", "配置加载", f"加载失败: {e}")
  1595. # 后续检查依赖配置对象
  1596. if config:
  1597. # 4) 调度配置检查
  1598. try:
  1599. ctx = AppContext(config)
  1600. schedule = ctx.create_scheduler().resolve()
  1601. detail = f"调度解析成功(report_mode={schedule.report_mode}, ai_mode={schedule.ai_mode})"
  1602. _record_doctor_result(results, "pass", "调度配置", detail)
  1603. except Exception as e:
  1604. _record_doctor_result(results, "fail", "调度配置", f"解析失败: {e}")
  1605. # 5) AI 配置检查(按功能场景区分严重级别)
  1606. ai_analysis_enabled = config.get("AI_ANALYSIS", {}).get("ENABLED", False)
  1607. ai_translation_enabled = config.get("AI_TRANSLATION", {}).get("ENABLED", False)
  1608. ai_filter_enabled = config.get("FILTER", {}).get("METHOD", "keyword") == "ai"
  1609. ai_enabled = ai_analysis_enabled or ai_translation_enabled or ai_filter_enabled
  1610. if ai_enabled:
  1611. try:
  1612. from trendradar.ai.client import AIClient
  1613. valid, message = AIClient(config.get("AI", {})).validate_config()
  1614. if valid:
  1615. _record_doctor_result(results, "pass", "AI配置", f"模型: {config.get('AI', {}).get('MODEL', '')}")
  1616. else:
  1617. # AI 分析/翻译是硬依赖;AI 筛选缺失时会自动回退关键词匹配
  1618. if ai_analysis_enabled or ai_translation_enabled:
  1619. _record_doctor_result(results, "fail", "AI配置", message)
  1620. else:
  1621. _record_doctor_result(results, "warn", "AI配置", f"{message}(AI 筛选将回退关键词模式)")
  1622. except Exception as e:
  1623. _record_doctor_result(results, "fail", "AI配置", f"校验异常: {e}")
  1624. else:
  1625. _record_doctor_result(results, "warn", "AI配置", "未启用 AI 功能,跳过校验")
  1626. # 6) 存储配置检查
  1627. try:
  1628. storage_cfg = config.get("STORAGE", {})
  1629. backend = storage_cfg.get("BACKEND", "auto")
  1630. remote = storage_cfg.get("REMOTE", {})
  1631. missing_remote_keys = [
  1632. k for k in ("BUCKET_NAME", "ACCESS_KEY_ID", "SECRET_ACCESS_KEY", "ENDPOINT_URL")
  1633. if not remote.get(k)
  1634. ]
  1635. if backend == "remote" and missing_remote_keys:
  1636. _record_doctor_result(
  1637. results, "fail", "存储配置",
  1638. f"remote 模式缺少配置: {', '.join(missing_remote_keys)}"
  1639. )
  1640. elif backend == "auto" and os.environ.get("GITHUB_ACTIONS") == "true" and missing_remote_keys:
  1641. _record_doctor_result(
  1642. results, "warn", "存储配置",
  1643. "GitHub Actions + auto 模式未完整配置远程存储,可能导致数据丢失"
  1644. )
  1645. else:
  1646. sm = AppContext(config).get_storage_manager()
  1647. _record_doctor_result(results, "pass", "存储配置", f"当前后端: {sm.backend_name}")
  1648. except Exception as e:
  1649. _record_doctor_result(results, "fail", "存储配置", f"检查失败: {e}")
  1650. # 7) 通知渠道配置检查
  1651. channel_details = []
  1652. channel_issues = []
  1653. max_accounts = config.get("MAX_ACCOUNTS_PER_CHANNEL", 3)
  1654. # 普通单值/多值渠道
  1655. for key, name in [
  1656. ("FEISHU_WEBHOOK_URL", "飞书"),
  1657. ("DINGTALK_WEBHOOK_URL", "钉钉"),
  1658. ("WEWORK_WEBHOOK_URL", "企业微信"),
  1659. ("BARK_URL", "Bark"),
  1660. ("SLACK_WEBHOOK_URL", "Slack"),
  1661. ("GENERIC_WEBHOOK_URL", "通用Webhook"),
  1662. ]:
  1663. values = parse_multi_account_config(config.get(key, ""))
  1664. if values:
  1665. channel_details.append(f"{name}({min(len(values), max_accounts)}个)")
  1666. # Telegram 配对校验
  1667. tg_tokens = parse_multi_account_config(config.get("TELEGRAM_BOT_TOKEN", ""))
  1668. tg_chats = parse_multi_account_config(config.get("TELEGRAM_CHAT_ID", ""))
  1669. if tg_tokens or tg_chats:
  1670. valid, count = validate_paired_configs(
  1671. {"bot_token": tg_tokens, "chat_id": tg_chats},
  1672. "Telegram",
  1673. required_keys=["bot_token", "chat_id"],
  1674. )
  1675. if valid and count > 0:
  1676. channel_details.append(f"Telegram({min(count, max_accounts)}个)")
  1677. else:
  1678. channel_issues.append("Telegram bot_token/chat_id 配置不完整或数量不一致")
  1679. # ntfy 配对校验(token 可选)
  1680. ntfy_server = config.get("NTFY_SERVER_URL", "")
  1681. ntfy_topics = parse_multi_account_config(config.get("NTFY_TOPIC", ""))
  1682. ntfy_tokens = parse_multi_account_config(config.get("NTFY_TOKEN", ""))
  1683. if ntfy_server and ntfy_topics:
  1684. if ntfy_tokens:
  1685. valid, count = validate_paired_configs(
  1686. {"topic": ntfy_topics, "token": ntfy_tokens},
  1687. "ntfy",
  1688. )
  1689. if valid and count > 0:
  1690. channel_details.append(f"ntfy({min(count, max_accounts)}个)")
  1691. else:
  1692. channel_issues.append("ntfy topic/token 数量不一致")
  1693. else:
  1694. channel_details.append(f"ntfy({min(len(ntfy_topics), max_accounts)}个)")
  1695. # 邮件配置完整性
  1696. email_ready = all(
  1697. [
  1698. config.get("EMAIL_FROM"),
  1699. config.get("EMAIL_PASSWORD"),
  1700. config.get("EMAIL_TO"),
  1701. ]
  1702. )
  1703. if email_ready:
  1704. channel_details.append("邮件")
  1705. elif any([config.get("EMAIL_FROM"), config.get("EMAIL_PASSWORD"), config.get("EMAIL_TO")]):
  1706. channel_issues.append("邮件配置不完整(需要 from/password/to 同时配置)")
  1707. if channel_issues and not channel_details:
  1708. _record_doctor_result(results, "fail", "通知配置", ";".join(channel_issues))
  1709. elif channel_issues and channel_details:
  1710. detail = f"可用渠道: {', '.join(channel_details)};问题: {';'.join(channel_issues)}"
  1711. _record_doctor_result(results, "warn", "通知配置", detail)
  1712. elif channel_details:
  1713. _record_doctor_result(results, "pass", "通知配置", f"可用渠道: {', '.join(channel_details)}")
  1714. else:
  1715. _record_doctor_result(results, "warn", "通知配置", "未配置任何通知渠道")
  1716. # 8) 输出目录可写检查
  1717. try:
  1718. output_dir = Path("output")
  1719. output_dir.mkdir(parents=True, exist_ok=True)
  1720. probe_file = output_dir / ".doctor_write_probe"
  1721. probe_file.write_text("ok", encoding="utf-8")
  1722. probe_file.unlink(missing_ok=True)
  1723. _record_doctor_result(results, "pass", "输出目录", f"可写: {output_dir}")
  1724. except Exception as e:
  1725. _record_doctor_result(results, "fail", "输出目录", f"不可写: {e}")
  1726. pass_count = sum(1 for status, _, _ in results if status == "pass")
  1727. warn_count = sum(1 for status, _, _ in results if status == "warn")
  1728. fail_count = sum(1 for status, _, _ in results if status == "fail")
  1729. _save_doctor_report(results, pass_count, warn_count, fail_count, config_path)
  1730. print("-" * 60)
  1731. print(f"体检结果: ✅ {pass_count} 项通过 ⚠️ {warn_count} 项警告 ❌ {fail_count} 项失败")
  1732. print("=" * 60)
  1733. if fail_count == 0:
  1734. print("体检通过。")
  1735. return True
  1736. print("体检未通过,请先修复失败项。")
  1737. return False
  1738. def _build_test_report_data(ctx: AppContext) -> Dict:
  1739. """构造通知测试用报告数据"""
  1740. now = ctx.get_time()
  1741. time_display = now.strftime("%H:%M")
  1742. title = f"TrendRadar 通知测试消息({now.strftime('%Y-%m-%d %H:%M:%S')})"
  1743. return {
  1744. "stats": [
  1745. {
  1746. "word": "连通性测试",
  1747. "count": 1,
  1748. "titles": [
  1749. {
  1750. "title": title,
  1751. "source_name": "TrendRadar",
  1752. "url": "https://github.com/sansan0/TrendRadar",
  1753. "mobile_url": "",
  1754. "ranks": [1],
  1755. "rank_threshold": ctx.rank_threshold,
  1756. "count": 1,
  1757. "is_new": True,
  1758. "time_display": time_display,
  1759. "matched_keyword": "连通性测试",
  1760. }
  1761. ],
  1762. }
  1763. ],
  1764. "failed_ids": [],
  1765. "new_titles": [],
  1766. "id_to_name": {},
  1767. }
  1768. def _create_test_html_file(ctx: AppContext) -> Optional[str]:
  1769. """创建邮件测试用 HTML 文件"""
  1770. try:
  1771. now = ctx.get_time()
  1772. output_dir = Path("output") / "html" / ctx.format_date()
  1773. output_dir.mkdir(parents=True, exist_ok=True)
  1774. html_path = output_dir / f"notification_test_{ctx.format_time()}.html"
  1775. html_content = f"""<!DOCTYPE html>
  1776. <html lang="zh-CN">
  1777. <head><meta charset="UTF-8"><title>TrendRadar 通知测试</title></head>
  1778. <body>
  1779. <h2>TrendRadar 通知连通性测试</h2>
  1780. <p>测试时间:{now.strftime('%Y-%m-%d %H:%M:%S')} ({ctx.timezone})</p>
  1781. <p>这是一条测试消息,用于验证邮件渠道是否可达。</p>
  1782. </body>
  1783. </html>"""
  1784. html_path.write_text(html_content, encoding="utf-8")
  1785. return str(html_path)
  1786. except Exception as e:
  1787. print(f"[测试通知] 创建测试 HTML 失败: {e}")
  1788. return None
  1789. def _run_test_notification(config: Dict) -> bool:
  1790. """发送测试通知到已配置渠道"""
  1791. from trendradar.notification import NotificationDispatcher
  1792. ctx = AppContext(config)
  1793. try:
  1794. # 检查是否配置了通知渠道
  1795. has_notification = any(
  1796. [
  1797. config.get("FEISHU_WEBHOOK_URL"),
  1798. config.get("DINGTALK_WEBHOOK_URL"),
  1799. config.get("WEWORK_WEBHOOK_URL"),
  1800. (config.get("TELEGRAM_BOT_TOKEN") and config.get("TELEGRAM_CHAT_ID")),
  1801. (config.get("EMAIL_FROM") and config.get("EMAIL_PASSWORD") and config.get("EMAIL_TO")),
  1802. (config.get("NTFY_SERVER_URL") and config.get("NTFY_TOPIC")),
  1803. config.get("BARK_URL"),
  1804. config.get("SLACK_WEBHOOK_URL"),
  1805. config.get("GENERIC_WEBHOOK_URL"),
  1806. ]
  1807. )
  1808. if not has_notification:
  1809. print("未检测到可用通知渠道,请先在 config.yaml 或环境变量中配置。")
  1810. return False
  1811. # 测试时固定展示区域,避免用户关闭 HOTLIST 导致测试内容为空
  1812. test_config = copy.deepcopy(config)
  1813. test_display = test_config.setdefault("DISPLAY", {})
  1814. test_regions = test_display.setdefault("REGIONS", {})
  1815. test_regions.update(
  1816. {
  1817. "HOTLIST": True,
  1818. "NEW_ITEMS": False,
  1819. "RSS": False,
  1820. "STANDALONE": False,
  1821. "AI_ANALYSIS": False,
  1822. }
  1823. )
  1824. # 测试时禁用翻译,避免触发额外 AI 调用
  1825. if "AI_TRANSLATION" in test_config:
  1826. test_config["AI_TRANSLATION"]["ENABLED"] = False
  1827. proxy_url = test_config.get("DEFAULT_PROXY", "") if test_config.get("USE_PROXY") else None
  1828. if proxy_url:
  1829. print("[测试通知] 检测到代理配置,将使用代理发送")
  1830. dispatcher = NotificationDispatcher(
  1831. config=test_config,
  1832. get_time_func=ctx.get_time,
  1833. split_content_func=ctx.split_content,
  1834. translator=None,
  1835. )
  1836. report_data = _build_test_report_data(ctx)
  1837. html_file_path = _create_test_html_file(ctx)
  1838. print("=" * 60)
  1839. print("通知连通性测试")
  1840. print("=" * 60)
  1841. results = dispatcher.dispatch_all(
  1842. report_data=report_data,
  1843. report_type="通知连通性测试",
  1844. proxy_url=proxy_url,
  1845. mode="daily",
  1846. html_file_path=html_file_path,
  1847. )
  1848. if not results:
  1849. print("没有可测试的有效通知渠道(可能配置不完整)。")
  1850. return False
  1851. print("-" * 60)
  1852. success_count = 0
  1853. for channel, ok in results.items():
  1854. if ok:
  1855. success_count += 1
  1856. print(f"✅ {channel}: 测试成功")
  1857. else:
  1858. print(f"❌ {channel}: 测试失败")
  1859. print("-" * 60)
  1860. print(f"测试结果: {success_count}/{len(results)} 个渠道成功")
  1861. return success_count > 0
  1862. finally:
  1863. ctx.cleanup()
  1864. def main():
  1865. """主程序入口"""
  1866. # 解析命令行参数
  1867. parser = argparse.ArgumentParser(
  1868. description="TrendRadar - 热点新闻聚合与分析工具",
  1869. formatter_class=argparse.RawDescriptionHelpFormatter,
  1870. epilog="""
  1871. 调度状态命令:
  1872. --show-schedule 显示当前调度状态(时间段、行为开关)
  1873. 诊断命令:
  1874. --doctor 运行环境与配置体检
  1875. --test-notification 发送测试通知到已配置渠道
  1876. 示例:
  1877. python -m trendradar # 正常运行
  1878. python -m trendradar --show-schedule # 查看当前调度状态
  1879. python -m trendradar --doctor # 运行一键体检
  1880. python -m trendradar --test-notification # 测试通知渠道连通性
  1881. """
  1882. )
  1883. parser.add_argument(
  1884. "--show-schedule",
  1885. action="store_true",
  1886. help="显示当前调度状态"
  1887. )
  1888. parser.add_argument(
  1889. "--doctor",
  1890. action="store_true",
  1891. help="运行环境与配置体检"
  1892. )
  1893. parser.add_argument(
  1894. "--test-notification",
  1895. action="store_true",
  1896. help="发送测试通知到已配置渠道"
  1897. )
  1898. args = parser.parse_args()
  1899. debug_mode = False
  1900. try:
  1901. # 处理 doctor 命令(不依赖完整运行流程)
  1902. if args.doctor:
  1903. ok = _run_doctor()
  1904. if not ok:
  1905. raise SystemExit(1)
  1906. return
  1907. # 先加载配置
  1908. config = load_config()
  1909. # 处理状态查看命令
  1910. if args.show_schedule:
  1911. _handle_status_commands(config)
  1912. return
  1913. # 处理通知测试命令
  1914. if args.test_notification:
  1915. ok = _run_test_notification(config)
  1916. if not ok:
  1917. raise SystemExit(1)
  1918. return
  1919. version_url = config.get("VERSION_CHECK_URL", "")
  1920. configs_version_url = config.get("CONFIGS_VERSION_CHECK_URL", "")
  1921. # 统一版本检查(程序版本 + 配置文件版本,只请求一次远程)
  1922. need_update = False
  1923. remote_version = None
  1924. if version_url:
  1925. need_update, remote_version = check_all_versions(version_url, configs_version_url)
  1926. # 复用已加载的配置,避免重复加载
  1927. analyzer = NewsAnalyzer(config=config)
  1928. # 设置更新信息(复用已获取的远程版本,不再重复请求)
  1929. if analyzer.is_github_actions and need_update and remote_version:
  1930. analyzer.update_info = {
  1931. "current_version": __version__,
  1932. "remote_version": remote_version,
  1933. }
  1934. # 获取 debug 配置
  1935. debug_mode = analyzer.ctx.config.get("DEBUG", False)
  1936. analyzer.run()
  1937. except FileNotFoundError as e:
  1938. print(f"❌ 配置文件错误: {e}")
  1939. print("\n请确保以下文件存在:")
  1940. print(" • config/config.yaml")
  1941. print(" • config/frequency_words.txt")
  1942. print("\n参考项目文档进行正确配置")
  1943. except Exception as e:
  1944. print(f"❌ 程序运行错误: {e}")
  1945. if debug_mode:
  1946. raise
  1947. def _handle_status_commands(config: Dict) -> None:
  1948. """处理状态查看命令 - 显示当前调度状态"""
  1949. from trendradar.context import AppContext
  1950. ctx = AppContext(config)
  1951. print("=" * 60)
  1952. print(f"TrendRadar v{__version__} 调度状态")
  1953. print("=" * 60)
  1954. try:
  1955. scheduler = ctx.create_scheduler()
  1956. schedule = scheduler.resolve()
  1957. now = ctx.get_time()
  1958. date_str = ctx.format_date()
  1959. print(f"\n⏰ 当前时间: {now.strftime('%Y-%m-%d %H:%M:%S')} ({ctx.timezone})")
  1960. print(f"📅 当前日期: {date_str}")
  1961. print(f"\n📋 调度信息:")
  1962. print(f" 日计划: {schedule.day_plan}")
  1963. if schedule.period_key:
  1964. print(f" 当前时间段: {schedule.period_name or schedule.period_key} ({schedule.period_key})")
  1965. else:
  1966. print(f" 当前时间段: 无(使用默认配置)")
  1967. print(f"\n🔧 行为开关:")
  1968. print(f" 采集数据: {'✅ 是' if schedule.collect else '❌ 否'}")
  1969. print(f" AI 分析: {'✅ 是' if schedule.analyze else '❌ 否'}")
  1970. print(f" 推送通知: {'✅ 是' if schedule.push else '❌ 否'}")
  1971. print(f" 报告模式: {schedule.report_mode}")
  1972. print(f" AI 模式: {schedule.ai_mode}")
  1973. if schedule.period_key:
  1974. print(f"\n🔁 一次性控制:")
  1975. if schedule.once_analyze:
  1976. already_analyzed = scheduler.already_executed(schedule.period_key, "analyze", date_str)
  1977. print(f" AI 分析: 仅一次 {'(今日已执行 ⚠️)' if already_analyzed else '(今日未执行 ✅)'}")
  1978. else:
  1979. print(f" AI 分析: 不限次数")
  1980. if schedule.once_push:
  1981. already_pushed = scheduler.already_executed(schedule.period_key, "push", date_str)
  1982. print(f" 推送通知: 仅一次 {'(今日已执行 ⚠️)' if already_pushed else '(今日未执行 ✅)'}")
  1983. else:
  1984. print(f" 推送通知: 不限次数")
  1985. except Exception as e:
  1986. print(f"\n❌ 获取调度状态失败: {e}")
  1987. print("\n" + "=" * 60)
  1988. # 清理资源
  1989. ctx.cleanup()
  1990. if __name__ == "__main__":
  1991. main()