splitter.py 81 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842
  1. # coding=utf-8
  2. """
  3. 消息分批处理模块
  4. 提供消息内容分批拆分功能,确保消息大小不超过各平台限制
  5. """
  6. from datetime import datetime
  7. from typing import Dict, List, Optional, Callable
  8. from trendradar.report.formatter import format_title_for_platform
  9. from trendradar.report.helpers import format_rank_display
  10. from trendradar.utils.time import DEFAULT_TIMEZONE, format_iso_time_friendly, convert_time_for_display
  11. from trendradar.notification.batch import truncate_at_line_boundary
  12. # === 分批安全辅助函数 ===
  13. def _split_content_by_lines(
  14. content: str, footer: str, max_bytes: int, base_header: str
  15. ) -> List[str]:
  16. """将超长内容按行边界拆分成多个完整批次(每个批次带 footer)
  17. 不会丢弃任何内容,溢出部分自动分配到后续批次。
  18. Args:
  19. content: 正文内容(不含 footer,可能含 base_header)
  20. footer: 尾部内容(更新时间等)
  21. max_bytes: 单批次最大字节数
  22. base_header: 后续批次的头部
  23. Returns:
  24. 完整批次列表(每个元素 = 正文 + footer,大小 ≤ max_bytes)
  25. """
  26. footer_size = len(footer.encode("utf-8"))
  27. result_batches = []
  28. lines = content.split("\n")
  29. current = ""
  30. for line in lines:
  31. candidate = current + line + "\n"
  32. if len(candidate.encode("utf-8")) + footer_size > max_bytes and current.strip():
  33. result_batches.append(current + footer)
  34. current = base_header + line + "\n"
  35. else:
  36. current = candidate
  37. if current.strip():
  38. result_batches.append(current + footer)
  39. return result_batches
  40. def _safe_append_batch(
  41. batches: List[str], content: str, footer: str, max_bytes: int,
  42. base_header: str = ""
  43. ) -> None:
  44. """安全追加批次,超限时按行拆分成多个批次(不丢弃内容)
  45. Args:
  46. batches: 批次列表(原地修改)
  47. content: 正文内容(不含 footer)
  48. footer: 尾部内容(更新时间等)
  49. max_bytes: 最大字节数
  50. base_header: 溢出时后续批次的头部
  51. """
  52. full = content + footer
  53. if len(full.encode("utf-8")) <= max_bytes:
  54. batches.append(full)
  55. return
  56. split_batches = _split_content_by_lines(content, footer, max_bytes, base_header)
  57. if split_batches:
  58. batches.extend(split_batches)
  59. else:
  60. # 极端情况:单行就超限,强制截断
  61. batches.append(truncate_at_line_boundary(full, max_bytes))
  62. def _safe_new_batch(
  63. new_content: str, footer: str, max_bytes: int, base_header: str,
  64. batches: List[str] = None
  65. ) -> str:
  66. """安全创建新批次,超限时将溢出内容拆分到 batches 中,返回最后一段作为 current_batch
  67. Args:
  68. new_content: 新批次完整内容(含 base_header + section_header + ...)
  69. footer: 尾部内容
  70. max_bytes: 最大字节数
  71. base_header: 基础头部
  72. batches: 批次列表,溢出部分追加到此(可选)
  73. Returns:
  74. 可安全继续追加内容的 current_batch(大小 + footer ≤ max_bytes)
  75. """
  76. if len((new_content + footer).encode("utf-8")) <= max_bytes:
  77. return new_content
  78. if batches is None:
  79. # 无法拆分到 batches,退回行边界截断
  80. footer_size = len(footer.encode("utf-8"))
  81. available = max_bytes - footer_size
  82. header_size = len(base_header.encode("utf-8"))
  83. if available <= header_size:
  84. return base_header
  85. return truncate_at_line_boundary(new_content, available)
  86. # 拆分:前面的部分存入 batches,最后一段作为 current_batch 返回
  87. split_batches = _split_content_by_lines(new_content, footer, max_bytes, base_header)
  88. if len(split_batches) <= 1:
  89. # 无法再拆,直接返回(由后续 _safe_append_batch 兜底)
  90. return new_content
  91. # 前 N-1 个批次存入 batches
  92. batches.extend(split_batches[:-1])
  93. # 最后一个批次去掉 footer 作为 current_batch(后续还会追加内容)
  94. last = split_batches[-1]
  95. if last.endswith(footer):
  96. return last[: -len(footer)]
  97. return last
  98. # 默认批次大小配置
  99. DEFAULT_BATCH_SIZES = {
  100. "dingtalk": 20000,
  101. "feishu": 29000,
  102. "ntfy": 3800,
  103. "default": 4000,
  104. }
  105. # 默认区域顺序
  106. DEFAULT_REGION_ORDER = ["hotlist", "rss", "new_items", "standalone", "ai_analysis"]
  107. def split_content_into_batches(
  108. report_data: Dict,
  109. format_type: str,
  110. update_info: Optional[Dict] = None,
  111. max_bytes: Optional[int] = None,
  112. mode: str = "daily",
  113. batch_sizes: Optional[Dict[str, int]] = None,
  114. feishu_separator: str = "---",
  115. region_order: Optional[List[str]] = None,
  116. get_time_func: Optional[Callable[[], datetime]] = None,
  117. rss_items: Optional[list] = None,
  118. rss_new_items: Optional[list] = None,
  119. timezone: str = DEFAULT_TIMEZONE,
  120. display_mode: str = "keyword",
  121. ai_content: Optional[str] = None,
  122. standalone_data: Optional[Dict] = None,
  123. rank_threshold: int = 10,
  124. ai_stats: Optional[Dict] = None,
  125. report_type: str = "热点分析报告",
  126. show_new_section: bool = True,
  127. ) -> List[str]:
  128. """分批处理消息内容,确保词组标题+至少第一条新闻的完整性(支持热榜+RSS合并+AI分析+独立展示区)
  129. 热榜统计与RSS统计并列显示,热榜新增与RSS新增并列显示。
  130. region_order 控制各区域的显示顺序。
  131. AI分析内容根据 region_order 中的位置显示。
  132. 独立展示区根据 region_order 中的位置显示。
  133. Args:
  134. report_data: 报告数据字典,包含 stats, new_titles, failed_ids, total_new_count
  135. format_type: 格式类型 (feishu, dingtalk, wework, telegram, ntfy, bark, slack)
  136. update_info: 版本更新信息(可选)
  137. max_bytes: 最大字节数(可选,如果不指定则使用默认配置)
  138. mode: 报告模式 (daily, incremental, current)
  139. batch_sizes: 批次大小配置字典(可选)
  140. feishu_separator: 飞书消息分隔符
  141. region_order: 区域显示顺序列表
  142. get_time_func: 获取当前时间的函数(可选)
  143. rss_items: RSS 统计条目列表(按源分组,用于合并推送)
  144. rss_new_items: RSS 新增条目列表(可选,用于新增区块)
  145. timezone: 时区名称(用于 RSS 时间格式化)
  146. display_mode: 显示模式 (keyword=按关键词分组, platform=按平台分组)
  147. ai_content: AI 分析内容(已渲染的字符串,可选)
  148. standalone_data: 独立展示区数据(可选),包含 platforms 和 rss_feeds 列表
  149. ai_stats: AI 分析统计数据(可选),包含 total_news, analyzed_news, max_news_limit 等
  150. Returns:
  151. 分批后的消息内容列表
  152. """
  153. if region_order is None:
  154. region_order = DEFAULT_REGION_ORDER
  155. # 合并批次大小配置
  156. sizes = {**DEFAULT_BATCH_SIZES, **(batch_sizes or {})}
  157. if max_bytes is None:
  158. if format_type == "dingtalk":
  159. max_bytes = sizes.get("dingtalk", 20000)
  160. elif format_type == "feishu":
  161. max_bytes = sizes.get("feishu", 29000)
  162. elif format_type == "ntfy":
  163. max_bytes = sizes.get("ntfy", 3800)
  164. else:
  165. max_bytes = sizes.get("default", 4000)
  166. batches = []
  167. total_hotlist_count = sum(
  168. len(stat["titles"]) for stat in report_data["stats"] if stat["count"] > 0
  169. )
  170. total_titles = total_hotlist_count
  171. # 累加 RSS 条目数
  172. if rss_items:
  173. total_titles += sum(stat.get("count", 0) for stat in rss_items)
  174. now = get_time_func() if get_time_func else datetime.now()
  175. # 构建头部信息
  176. base_header = ""
  177. # 准备 AI 分析统计行(如果存在)
  178. ai_stats_line = ""
  179. if ai_stats and ai_stats.get("analyzed_news", 0) > 0:
  180. analyzed_news = ai_stats.get("analyzed_news", 0)
  181. total_news = ai_stats.get("total_news", 0)
  182. ai_mode = ai_stats.get("ai_mode", "")
  183. # 构建分析数显示:如果被截断则显示 "实际分析数/总可分析数"
  184. if total_news > analyzed_news:
  185. news_display = f"{analyzed_news}/{total_news}"
  186. else:
  187. news_display = str(analyzed_news)
  188. # 如果 AI 模式与推送模式不同,显示模式标识
  189. mode_suffix = ""
  190. if ai_mode and ai_mode != mode:
  191. mode_map = {
  192. "daily": "全天汇总",
  193. "current": "当前榜单",
  194. "incremental": "增量分析"
  195. }
  196. mode_label = mode_map.get(ai_mode, ai_mode)
  197. mode_suffix = f" ({mode_label})"
  198. if format_type in ("wework", "bark", "ntfy", "feishu", "dingtalk"):
  199. ai_stats_line = f"**AI 分析数:** {news_display}{mode_suffix}\n"
  200. elif format_type == "slack":
  201. ai_stats_line = f"*AI 分析数:* {news_display}{mode_suffix}\n"
  202. elif format_type == "telegram":
  203. ai_stats_line = f"AI 分析数: {news_display}{mode_suffix}\n"
  204. # 构建统一的头部(总是显示总新闻数、时间和类型)
  205. if format_type in ("wework", "bark"):
  206. base_header = f"**总新闻数:** {total_titles}\n"
  207. base_header += ai_stats_line
  208. base_header += f"**时间:** {now.strftime('%Y-%m-%d %H:%M:%S')}\n"
  209. base_header += f"**类型:** {report_type}\n\n"
  210. elif format_type == "telegram":
  211. base_header = f"总新闻数: {total_titles}\n"
  212. base_header += ai_stats_line
  213. base_header += f"时间: {now.strftime('%Y-%m-%d %H:%M:%S')}\n"
  214. base_header += f"类型: {report_type}\n\n"
  215. elif format_type == "ntfy":
  216. base_header = f"**总新闻数:** {total_titles}\n"
  217. base_header += ai_stats_line
  218. base_header += f"**时间:** {now.strftime('%Y-%m-%d %H:%M:%S')}\n"
  219. base_header += f"**类型:** {report_type}\n\n"
  220. elif format_type == "feishu":
  221. base_header = f"**总新闻数:** {total_titles}\n"
  222. base_header += ai_stats_line
  223. base_header += f"**时间:** {now.strftime('%Y-%m-%d %H:%M:%S')}\n"
  224. base_header += f"**类型:** {report_type}\n\n"
  225. base_header += "---\n\n"
  226. elif format_type == "dingtalk":
  227. base_header = f"**总新闻数:** {total_titles}\n"
  228. base_header += ai_stats_line
  229. base_header += f"**时间:** {now.strftime('%Y-%m-%d %H:%M:%S')}\n"
  230. base_header += f"**类型:** {report_type}\n\n"
  231. base_header += "---\n\n"
  232. elif format_type == "slack":
  233. base_header = f"*总新闻数:* {total_titles}\n"
  234. base_header += ai_stats_line
  235. base_header += f"*时间:* {now.strftime('%Y-%m-%d %H:%M:%S')}\n"
  236. base_header += f"*类型:* {report_type}\n\n"
  237. base_footer = ""
  238. if format_type in ("wework", "bark"):
  239. base_footer = f"\n\n\n> 更新时间:{now.strftime('%Y-%m-%d %H:%M:%S')}"
  240. if update_info:
  241. base_footer += f"\n> TrendRadar 发现新版本 **{update_info['remote_version']}**,当前 **{update_info['current_version']}**"
  242. elif format_type == "telegram":
  243. base_footer = f"\n\n更新时间:{now.strftime('%Y-%m-%d %H:%M:%S')}"
  244. if update_info:
  245. base_footer += f"\nTrendRadar 发现新版本 {update_info['remote_version']},当前 {update_info['current_version']}"
  246. elif format_type == "ntfy":
  247. base_footer = f"\n\n> 更新时间:{now.strftime('%Y-%m-%d %H:%M:%S')}"
  248. if update_info:
  249. base_footer += f"\n> TrendRadar 发现新版本 **{update_info['remote_version']}**,当前 **{update_info['current_version']}**"
  250. elif format_type == "feishu":
  251. base_footer = f"\n\n<font color='grey'>更新时间:{now.strftime('%Y-%m-%d %H:%M:%S')}</font>"
  252. if update_info:
  253. base_footer += f"\n<font color='grey'>TrendRadar 发现新版本 {update_info['remote_version']},当前 {update_info['current_version']}</font>"
  254. elif format_type == "dingtalk":
  255. base_footer = f"\n\n> 更新时间:{now.strftime('%Y-%m-%d %H:%M:%S')}"
  256. if update_info:
  257. base_footer += f"\n> TrendRadar 发现新版本 **{update_info['remote_version']}**,当前 **{update_info['current_version']}**"
  258. elif format_type == "slack":
  259. base_footer = f"\n\n_更新时间:{now.strftime('%Y-%m-%d %H:%M:%S')}_"
  260. if update_info:
  261. base_footer += f"\n_TrendRadar 发现新版本 *{update_info['remote_version']}*,当前 *{update_info['current_version']}_"
  262. # 根据 display_mode 选择统计标题
  263. stats_title = "热点词汇统计" if display_mode == "keyword" else "热点新闻统计"
  264. stats_header = ""
  265. if report_data["stats"]:
  266. if format_type in ("wework", "bark"):
  267. stats_header = f"📊 **{stats_title}** (共 {total_hotlist_count} 条)\n\n"
  268. elif format_type == "telegram":
  269. stats_header = f"📊 {stats_title} (共 {total_hotlist_count} 条)\n\n"
  270. elif format_type == "ntfy":
  271. stats_header = f"📊 **{stats_title}** (共 {total_hotlist_count} 条)\n\n"
  272. elif format_type == "feishu":
  273. stats_header = f"📊 **{stats_title}** (共 {total_hotlist_count} 条)\n\n"
  274. elif format_type == "dingtalk":
  275. stats_header = f"📊 **{stats_title}** (共 {total_hotlist_count} 条)\n\n"
  276. elif format_type == "slack":
  277. stats_header = f"📊 *{stats_title}* (共 {total_hotlist_count} 条)\n\n"
  278. current_batch = base_header
  279. current_batch_has_content = False
  280. # 当没有热榜数据时的处理
  281. # 注意:如果有 ai_content,不应该返回"暂无匹配"消息,而应该继续处理 AI 内容
  282. if (
  283. not report_data["stats"]
  284. and not report_data["new_titles"]
  285. and not report_data["failed_ids"]
  286. and not ai_content # 有 AI 内容时不返回"暂无匹配"
  287. and not rss_items # 有 RSS 内容时也不返回
  288. and not standalone_data # 有独立展示区数据时也不返回
  289. ):
  290. if mode == "incremental":
  291. mode_text = "增量模式下暂无新增匹配的热点词汇"
  292. elif mode == "current":
  293. mode_text = "当前榜单模式下暂无匹配的热点词汇"
  294. else:
  295. mode_text = "暂无匹配的热点词汇"
  296. simple_content = f"📭 {mode_text}\n\n"
  297. final_content = base_header + simple_content + base_footer
  298. batches.append(final_content)
  299. return batches
  300. # 定义处理热点词汇统计的函数
  301. def process_stats_section(current_batch, current_batch_has_content, batches, add_separator=True):
  302. """处理热点词汇统计"""
  303. if not report_data["stats"]:
  304. return current_batch, current_batch_has_content, batches
  305. total_count = len(report_data["stats"])
  306. # 根据 add_separator 决定是否添加前置分割线
  307. actual_stats_header = ""
  308. if add_separator and current_batch_has_content:
  309. # 需要添加分割线
  310. if format_type == "feishu":
  311. actual_stats_header = f"\n{feishu_separator}\n\n{stats_header}"
  312. elif format_type == "dingtalk":
  313. actual_stats_header = f"\n---\n\n{stats_header}"
  314. elif format_type in ("wework", "bark"):
  315. actual_stats_header = f"\n\n\n\n{stats_header}"
  316. else:
  317. actual_stats_header = f"\n\n{stats_header}"
  318. else:
  319. # 不需要分割线(第一个区域)
  320. actual_stats_header = stats_header
  321. # 添加统计标题
  322. test_content = current_batch + actual_stats_header
  323. if (
  324. len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
  325. < max_bytes
  326. ):
  327. current_batch = test_content
  328. current_batch_has_content = True
  329. else:
  330. if current_batch_has_content:
  331. _safe_append_batch(batches, current_batch, base_footer, max_bytes, base_header)
  332. current_batch = _safe_new_batch(
  333. base_header + stats_header, base_footer, max_bytes, base_header, batches
  334. )
  335. current_batch_has_content = True
  336. # 逐个处理词组(确保词组标题+第一条新闻的原子性)
  337. for i, stat in enumerate(report_data["stats"]):
  338. word = stat["word"]
  339. count = stat["count"]
  340. sequence_display = f"[{i + 1}/{total_count}]"
  341. # 构建词组标题
  342. word_header = ""
  343. if format_type in ("wework", "bark"):
  344. if count >= 10:
  345. word_header = (
  346. f"🔥 {sequence_display} **{word}** : **{count}** 条\n\n"
  347. )
  348. elif count >= 5:
  349. word_header = (
  350. f"📈 {sequence_display} **{word}** : **{count}** 条\n\n"
  351. )
  352. else:
  353. word_header = f"📌 {sequence_display} **{word}** : {count} 条\n\n"
  354. elif format_type == "telegram":
  355. if count >= 10:
  356. word_header = f"🔥 {sequence_display} {word} : {count} 条\n\n"
  357. elif count >= 5:
  358. word_header = f"📈 {sequence_display} {word} : {count} 条\n\n"
  359. else:
  360. word_header = f"📌 {sequence_display} {word} : {count} 条\n\n"
  361. elif format_type == "ntfy":
  362. if count >= 10:
  363. word_header = (
  364. f"🔥 {sequence_display} **{word}** : **{count}** 条\n\n"
  365. )
  366. elif count >= 5:
  367. word_header = (
  368. f"📈 {sequence_display} **{word}** : **{count}** 条\n\n"
  369. )
  370. else:
  371. word_header = f"📌 {sequence_display} **{word}** : {count} 条\n\n"
  372. elif format_type == "feishu":
  373. if count >= 10:
  374. word_header = f"🔥 <font color='grey'>{sequence_display}</font> **{word}** : <font color='red'>{count}</font> 条\n\n"
  375. elif count >= 5:
  376. word_header = f"📈 <font color='grey'>{sequence_display}</font> **{word}** : <font color='orange'>{count}</font> 条\n\n"
  377. else:
  378. word_header = f"📌 <font color='grey'>{sequence_display}</font> **{word}** : {count} 条\n\n"
  379. elif format_type == "dingtalk":
  380. if count >= 10:
  381. word_header = (
  382. f"🔥 {sequence_display} **{word}** : **{count}** 条\n\n"
  383. )
  384. elif count >= 5:
  385. word_header = (
  386. f"📈 {sequence_display} **{word}** : **{count}** 条\n\n"
  387. )
  388. else:
  389. word_header = f"📌 {sequence_display} **{word}** : {count} 条\n\n"
  390. elif format_type == "slack":
  391. if count >= 10:
  392. word_header = (
  393. f"🔥 {sequence_display} *{word}* : *{count}* 条\n\n"
  394. )
  395. elif count >= 5:
  396. word_header = (
  397. f"📈 {sequence_display} *{word}* : *{count}* 条\n\n"
  398. )
  399. else:
  400. word_header = f"📌 {sequence_display} *{word}* : {count} 条\n\n"
  401. # 构建第一条新闻
  402. # display_mode: keyword=显示来源, platform=显示关键词
  403. show_source = display_mode == "keyword"
  404. show_keyword = display_mode == "platform"
  405. first_news_line = ""
  406. if stat["titles"]:
  407. first_title_data = stat["titles"][0]
  408. if format_type in ("wework", "bark"):
  409. formatted_title = format_title_for_platform(
  410. "wework", first_title_data, show_source=show_source, show_keyword=show_keyword
  411. )
  412. elif format_type == "telegram":
  413. formatted_title = format_title_for_platform(
  414. "telegram", first_title_data, show_source=show_source, show_keyword=show_keyword
  415. )
  416. elif format_type == "ntfy":
  417. formatted_title = format_title_for_platform(
  418. "ntfy", first_title_data, show_source=show_source, show_keyword=show_keyword
  419. )
  420. elif format_type == "feishu":
  421. formatted_title = format_title_for_platform(
  422. "feishu", first_title_data, show_source=show_source, show_keyword=show_keyword
  423. )
  424. elif format_type == "dingtalk":
  425. formatted_title = format_title_for_platform(
  426. "dingtalk", first_title_data, show_source=show_source, show_keyword=show_keyword
  427. )
  428. elif format_type == "slack":
  429. formatted_title = format_title_for_platform(
  430. "slack", first_title_data, show_source=show_source, show_keyword=show_keyword
  431. )
  432. else:
  433. formatted_title = f"{first_title_data['title']}"
  434. first_news_line = f" 1. {formatted_title}\n"
  435. if len(stat["titles"]) > 1:
  436. first_news_line += "\n"
  437. # 原子性检查:词组标题+第一条新闻必须一起处理
  438. word_with_first_news = word_header + first_news_line
  439. test_content = current_batch + word_with_first_news
  440. if (
  441. len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
  442. >= max_bytes
  443. ):
  444. if current_batch_has_content:
  445. _safe_append_batch(batches, current_batch, base_footer, max_bytes, base_header)
  446. current_batch = _safe_new_batch(
  447. base_header + stats_header + word_with_first_news,
  448. base_footer, max_bytes, base_header, batches
  449. )
  450. current_batch_has_content = True
  451. start_index = 1
  452. else:
  453. current_batch = test_content
  454. current_batch_has_content = True
  455. start_index = 1
  456. # 处理剩余新闻条目
  457. for j in range(start_index, len(stat["titles"])):
  458. title_data = stat["titles"][j]
  459. if format_type in ("wework", "bark"):
  460. formatted_title = format_title_for_platform(
  461. "wework", title_data, show_source=show_source, show_keyword=show_keyword
  462. )
  463. elif format_type == "telegram":
  464. formatted_title = format_title_for_platform(
  465. "telegram", title_data, show_source=show_source, show_keyword=show_keyword
  466. )
  467. elif format_type == "ntfy":
  468. formatted_title = format_title_for_platform(
  469. "ntfy", title_data, show_source=show_source, show_keyword=show_keyword
  470. )
  471. elif format_type == "feishu":
  472. formatted_title = format_title_for_platform(
  473. "feishu", title_data, show_source=show_source, show_keyword=show_keyword
  474. )
  475. elif format_type == "dingtalk":
  476. formatted_title = format_title_for_platform(
  477. "dingtalk", title_data, show_source=show_source, show_keyword=show_keyword
  478. )
  479. elif format_type == "slack":
  480. formatted_title = format_title_for_platform(
  481. "slack", title_data, show_source=show_source, show_keyword=show_keyword
  482. )
  483. else:
  484. formatted_title = f"{title_data['title']}"
  485. news_line = f" {j + 1}. {formatted_title}\n"
  486. if j < len(stat["titles"]) - 1:
  487. news_line += "\n"
  488. test_content = current_batch + news_line
  489. if (
  490. len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
  491. >= max_bytes
  492. ):
  493. if current_batch_has_content:
  494. _safe_append_batch(batches, current_batch, base_footer, max_bytes, base_header)
  495. current_batch = _safe_new_batch(
  496. base_header + stats_header + word_header + news_line,
  497. base_footer, max_bytes, base_header, batches
  498. )
  499. current_batch_has_content = True
  500. else:
  501. current_batch = test_content
  502. current_batch_has_content = True
  503. # 词组间分隔符
  504. if i < len(report_data["stats"]) - 1:
  505. separator = ""
  506. if format_type in ("wework", "bark"):
  507. separator = f"\n\n\n\n"
  508. elif format_type == "telegram":
  509. separator = f"\n\n"
  510. elif format_type == "ntfy":
  511. separator = f"\n\n"
  512. elif format_type == "feishu":
  513. separator = f"\n{feishu_separator}\n\n"
  514. elif format_type == "dingtalk":
  515. separator = f"\n---\n\n"
  516. elif format_type == "slack":
  517. separator = f"\n\n"
  518. test_content = current_batch + separator
  519. if (
  520. len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
  521. < max_bytes
  522. ):
  523. current_batch = test_content
  524. return current_batch, current_batch_has_content, batches
  525. # 定义处理新增新闻的函数
  526. def process_new_titles_section(current_batch, current_batch_has_content, batches, add_separator=True):
  527. """处理新增新闻"""
  528. if not show_new_section or not report_data["new_titles"]:
  529. return current_batch, current_batch_has_content, batches
  530. # 根据 add_separator 决定是否添加前置分割线
  531. new_header = ""
  532. if add_separator and current_batch_has_content:
  533. # 需要添加分割线
  534. if format_type in ("wework", "bark"):
  535. new_header = f"\n\n\n\n🆕 **本次新增热点新闻** (共 {report_data['total_new_count']} 条)\n\n"
  536. elif format_type == "telegram":
  537. new_header = (
  538. f"\n\n🆕 本次新增热点新闻 (共 {report_data['total_new_count']} 条)\n\n"
  539. )
  540. elif format_type == "ntfy":
  541. new_header = f"\n\n🆕 **本次新增热点新闻** (共 {report_data['total_new_count']} 条)\n\n"
  542. elif format_type == "feishu":
  543. new_header = f"\n{feishu_separator}\n\n🆕 **本次新增热点新闻** (共 {report_data['total_new_count']} 条)\n\n"
  544. elif format_type == "dingtalk":
  545. new_header = f"\n---\n\n🆕 **本次新增热点新闻** (共 {report_data['total_new_count']} 条)\n\n"
  546. elif format_type == "slack":
  547. new_header = f"\n\n🆕 *本次新增热点新闻* (共 {report_data['total_new_count']} 条)\n\n"
  548. else:
  549. # 不需要分割线(第一个区域)
  550. if format_type in ("wework", "bark"):
  551. new_header = f"🆕 **本次新增热点新闻** (共 {report_data['total_new_count']} 条)\n\n"
  552. elif format_type == "telegram":
  553. new_header = f"🆕 本次新增热点新闻 (共 {report_data['total_new_count']} 条)\n\n"
  554. elif format_type == "ntfy":
  555. new_header = f"🆕 **本次新增热点新闻** (共 {report_data['total_new_count']} 条)\n\n"
  556. elif format_type == "feishu":
  557. new_header = f"🆕 **本次新增热点新闻** (共 {report_data['total_new_count']} 条)\n\n"
  558. elif format_type == "dingtalk":
  559. new_header = f"🆕 **本次新增热点新闻** (共 {report_data['total_new_count']} 条)\n\n"
  560. elif format_type == "slack":
  561. new_header = f"🆕 *本次新增热点新闻* (共 {report_data['total_new_count']} 条)\n\n"
  562. test_content = current_batch + new_header
  563. if (
  564. len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
  565. >= max_bytes
  566. ):
  567. if current_batch_has_content:
  568. _safe_append_batch(batches, current_batch, base_footer, max_bytes, base_header)
  569. current_batch = _safe_new_batch(
  570. base_header + new_header, base_footer, max_bytes, base_header, batches
  571. )
  572. current_batch_has_content = True
  573. else:
  574. current_batch = test_content
  575. current_batch_has_content = True
  576. # 逐个处理新增新闻来源
  577. for source_data in report_data["new_titles"]:
  578. source_header = ""
  579. if format_type in ("wework", "bark"):
  580. source_header = f"**{source_data['source_name']}** ({len(source_data['titles'])} 条):\n\n"
  581. elif format_type == "telegram":
  582. source_header = f"{source_data['source_name']} ({len(source_data['titles'])} 条):\n\n"
  583. elif format_type == "ntfy":
  584. source_header = f"**{source_data['source_name']}** ({len(source_data['titles'])} 条):\n\n"
  585. elif format_type == "feishu":
  586. source_header = f"**{source_data['source_name']}** ({len(source_data['titles'])} 条):\n\n"
  587. elif format_type == "dingtalk":
  588. source_header = f"**{source_data['source_name']}** ({len(source_data['titles'])} 条):\n\n"
  589. elif format_type == "slack":
  590. source_header = f"*{source_data['source_name']}* ({len(source_data['titles'])} 条):\n\n"
  591. # 构建第一条新增新闻
  592. first_news_line = ""
  593. if source_data["titles"]:
  594. first_title_data = source_data["titles"][0]
  595. title_data_copy = first_title_data.copy()
  596. title_data_copy["is_new"] = False
  597. if format_type in ("wework", "bark"):
  598. formatted_title = format_title_for_platform(
  599. "wework", title_data_copy, show_source=False
  600. )
  601. elif format_type == "telegram":
  602. formatted_title = format_title_for_platform(
  603. "telegram", title_data_copy, show_source=False
  604. )
  605. elif format_type == "feishu":
  606. formatted_title = format_title_for_platform(
  607. "feishu", title_data_copy, show_source=False
  608. )
  609. elif format_type == "dingtalk":
  610. formatted_title = format_title_for_platform(
  611. "dingtalk", title_data_copy, show_source=False
  612. )
  613. elif format_type == "slack":
  614. formatted_title = format_title_for_platform(
  615. "slack", title_data_copy, show_source=False
  616. )
  617. else:
  618. formatted_title = f"{title_data_copy['title']}"
  619. first_news_line = f" 1. {formatted_title}\n"
  620. # 原子性检查:来源标题+第一条新闻
  621. source_with_first_news = source_header + first_news_line
  622. test_content = current_batch + source_with_first_news
  623. if (
  624. len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
  625. >= max_bytes
  626. ):
  627. if current_batch_has_content:
  628. _safe_append_batch(batches, current_batch, base_footer, max_bytes, base_header)
  629. current_batch = _safe_new_batch(
  630. base_header + new_header + source_with_first_news,
  631. base_footer, max_bytes, base_header, batches
  632. )
  633. current_batch_has_content = True
  634. start_index = 1
  635. else:
  636. current_batch = test_content
  637. current_batch_has_content = True
  638. start_index = 1
  639. # 处理剩余新增新闻
  640. for j in range(start_index, len(source_data["titles"])):
  641. title_data = source_data["titles"][j]
  642. title_data_copy = title_data.copy()
  643. title_data_copy["is_new"] = False
  644. if format_type == "wework":
  645. formatted_title = format_title_for_platform(
  646. "wework", title_data_copy, show_source=False
  647. )
  648. elif format_type == "telegram":
  649. formatted_title = format_title_for_platform(
  650. "telegram", title_data_copy, show_source=False
  651. )
  652. elif format_type == "feishu":
  653. formatted_title = format_title_for_platform(
  654. "feishu", title_data_copy, show_source=False
  655. )
  656. elif format_type == "dingtalk":
  657. formatted_title = format_title_for_platform(
  658. "dingtalk", title_data_copy, show_source=False
  659. )
  660. elif format_type == "slack":
  661. formatted_title = format_title_for_platform(
  662. "slack", title_data_copy, show_source=False
  663. )
  664. else:
  665. formatted_title = f"{title_data_copy['title']}"
  666. news_line = f" {j + 1}. {formatted_title}\n"
  667. test_content = current_batch + news_line
  668. if (
  669. len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
  670. >= max_bytes
  671. ):
  672. if current_batch_has_content:
  673. _safe_append_batch(batches, current_batch, base_footer, max_bytes, base_header)
  674. current_batch = _safe_new_batch(
  675. base_header + new_header + source_header + news_line,
  676. base_footer, max_bytes, base_header, batches
  677. )
  678. current_batch_has_content = True
  679. else:
  680. current_batch = test_content
  681. current_batch_has_content = True
  682. current_batch += "\n"
  683. return current_batch, current_batch_has_content, batches
  684. # 定义处理 AI 分析的函数
  685. def process_ai_section(current_batch, current_batch_has_content, batches, add_separator=True):
  686. """处理 AI 分析内容"""
  687. nonlocal ai_content
  688. if not ai_content:
  689. return current_batch, current_batch_has_content, batches
  690. # 根据 add_separator 决定是否添加前置分割线
  691. ai_separator = ""
  692. if add_separator and current_batch_has_content:
  693. # 需要添加分割线
  694. if format_type == "feishu":
  695. ai_separator = f"\n{feishu_separator}\n\n"
  696. elif format_type == "dingtalk":
  697. ai_separator = "\n---\n\n"
  698. elif format_type in ("wework", "bark"):
  699. ai_separator = "\n\n\n\n"
  700. elif format_type in ("telegram", "ntfy", "slack"):
  701. ai_separator = "\n\n"
  702. # 如果不需要分割线,ai_separator 保持为空字符串
  703. # 尝试将 AI 内容添加到当前批次
  704. test_content = current_batch + ai_separator + ai_content
  705. if (
  706. len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
  707. < max_bytes
  708. ):
  709. current_batch = test_content
  710. current_batch_has_content = True
  711. else:
  712. if current_batch_has_content:
  713. _safe_append_batch(batches, current_batch, base_footer, max_bytes, base_header)
  714. # AI 内容可能很长,按行拆分成多个批次
  715. footer_size = len(base_footer.encode("utf-8"))
  716. header_size = len(base_header.encode("utf-8"))
  717. available = max_bytes - footer_size - header_size
  718. ai_lines = ai_content.split("\n")
  719. current_batch = base_header
  720. current_batch_has_content = False
  721. for line in ai_lines:
  722. test_line = line + "\n" if not line.endswith("\n") else line
  723. test_content = current_batch + test_line
  724. if len(test_content.encode("utf-8")) + footer_size >= max_bytes and current_batch_has_content:
  725. _safe_append_batch(batches, current_batch, base_footer, max_bytes, base_header)
  726. current_batch = base_header + test_line
  727. else:
  728. current_batch = test_content
  729. current_batch_has_content = True
  730. return current_batch, current_batch_has_content, batches
  731. # 定义处理独立展示区的函数
  732. def process_standalone_section_wrapper(current_batch, current_batch_has_content, batches, add_separator=True):
  733. """处理独立展示区"""
  734. if not standalone_data:
  735. return current_batch, current_batch_has_content, batches
  736. return _process_standalone_section(
  737. standalone_data, format_type, feishu_separator, base_header, base_footer,
  738. max_bytes, current_batch, current_batch_has_content, batches, timezone,
  739. rank_threshold, add_separator
  740. )
  741. # 定义处理 RSS 统计的函数
  742. def process_rss_stats_wrapper(current_batch, current_batch_has_content, batches, add_separator=True):
  743. """处理 RSS 统计"""
  744. if not rss_items:
  745. return current_batch, current_batch_has_content, batches
  746. return _process_rss_stats_section(
  747. rss_items, format_type, feishu_separator, base_header, base_footer,
  748. max_bytes, current_batch, current_batch_has_content, batches, timezone,
  749. add_separator
  750. )
  751. # 定义处理 RSS 新增的函数
  752. def process_rss_new_wrapper(current_batch, current_batch_has_content, batches, add_separator=True):
  753. """处理 RSS 新增"""
  754. if not rss_new_items:
  755. return current_batch, current_batch_has_content, batches
  756. return _process_rss_new_titles_section(
  757. rss_new_items, format_type, feishu_separator, base_header, base_footer,
  758. max_bytes, current_batch, current_batch_has_content, batches, timezone,
  759. add_separator
  760. )
  761. # 按 region_order 顺序处理各区域
  762. # 记录是否已有区域内容(用于决定是否添加分割线)
  763. has_region_content = False
  764. for region in region_order:
  765. # 记录处理前的状态,用于判断该区域是否产生了内容
  766. batch_before = current_batch
  767. has_content_before = current_batch_has_content
  768. batches_len_before = len(batches)
  769. # 决定是否需要添加分割线(第一个有内容的区域不需要)
  770. add_separator = has_region_content
  771. if region == "hotlist":
  772. # 处理热榜统计
  773. current_batch, current_batch_has_content, batches = process_stats_section(
  774. current_batch, current_batch_has_content, batches, add_separator
  775. )
  776. elif region == "rss":
  777. # 处理 RSS 统计
  778. current_batch, current_batch_has_content, batches = process_rss_stats_wrapper(
  779. current_batch, current_batch_has_content, batches, add_separator
  780. )
  781. elif region == "new_items":
  782. # 处理热榜新增
  783. current_batch, current_batch_has_content, batches = process_new_titles_section(
  784. current_batch, current_batch_has_content, batches, add_separator
  785. )
  786. # 处理 RSS 新增(跟随 new_items,继承 add_separator 逻辑)
  787. # 如果热榜新增产生了内容,RSS 新增需要分割线
  788. new_batch_changed = (
  789. current_batch != batch_before or
  790. current_batch_has_content != has_content_before or
  791. len(batches) != batches_len_before
  792. )
  793. rss_new_separator = new_batch_changed or has_region_content
  794. current_batch, current_batch_has_content, batches = process_rss_new_wrapper(
  795. current_batch, current_batch_has_content, batches, rss_new_separator
  796. )
  797. elif region == "standalone":
  798. # 处理独立展示区
  799. current_batch, current_batch_has_content, batches = process_standalone_section_wrapper(
  800. current_batch, current_batch_has_content, batches, add_separator
  801. )
  802. elif region == "ai_analysis":
  803. # 处理 AI 分析
  804. current_batch, current_batch_has_content, batches = process_ai_section(
  805. current_batch, current_batch_has_content, batches, add_separator
  806. )
  807. # 检查该区域是否产生了内容
  808. region_produced_content = (
  809. current_batch != batch_before or
  810. current_batch_has_content != has_content_before or
  811. len(batches) != batches_len_before
  812. )
  813. if region_produced_content:
  814. has_region_content = True
  815. if report_data["failed_ids"]:
  816. failed_header = ""
  817. if format_type == "wework":
  818. failed_header = f"\n\n\n\n⚠️ **数据获取失败的平台:**\n\n"
  819. elif format_type == "telegram":
  820. failed_header = f"\n\n⚠️ 数据获取失败的平台:\n\n"
  821. elif format_type == "ntfy":
  822. failed_header = f"\n\n⚠️ **数据获取失败的平台:**\n\n"
  823. elif format_type == "feishu":
  824. failed_header = f"\n{feishu_separator}\n\n⚠️ **数据获取失败的平台:**\n\n"
  825. elif format_type == "dingtalk":
  826. failed_header = f"\n---\n\n⚠️ **数据获取失败的平台:**\n\n"
  827. test_content = current_batch + failed_header
  828. if (
  829. len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
  830. >= max_bytes
  831. ):
  832. if current_batch_has_content:
  833. _safe_append_batch(batches, current_batch, base_footer, max_bytes, base_header)
  834. current_batch = _safe_new_batch(
  835. base_header + failed_header, base_footer, max_bytes, base_header, batches
  836. )
  837. current_batch_has_content = True
  838. else:
  839. current_batch = test_content
  840. current_batch_has_content = True
  841. for i, id_value in enumerate(report_data["failed_ids"], 1):
  842. if format_type == "feishu":
  843. failed_line = f" • <font color='red'>{id_value}</font>\n"
  844. elif format_type == "dingtalk":
  845. failed_line = f" • **{id_value}**\n"
  846. else:
  847. failed_line = f" • {id_value}\n"
  848. test_content = current_batch + failed_line
  849. if (
  850. len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
  851. >= max_bytes
  852. ):
  853. if current_batch_has_content:
  854. _safe_append_batch(batches, current_batch, base_footer, max_bytes, base_header)
  855. current_batch = _safe_new_batch(
  856. base_header + failed_header + failed_line,
  857. base_footer, max_bytes, base_header, batches
  858. )
  859. current_batch_has_content = True
  860. else:
  861. current_batch = test_content
  862. current_batch_has_content = True
  863. # 完成最后批次
  864. if current_batch_has_content:
  865. _safe_append_batch(batches, current_batch, base_footer, max_bytes, base_header)
  866. return batches
  867. def _process_rss_stats_section(
  868. rss_stats: list,
  869. format_type: str,
  870. feishu_separator: str,
  871. base_header: str,
  872. base_footer: str,
  873. max_bytes: int,
  874. current_batch: str,
  875. current_batch_has_content: bool,
  876. batches: List[str],
  877. timezone: str = DEFAULT_TIMEZONE,
  878. add_separator: bool = True,
  879. ) -> tuple:
  880. """处理 RSS 统计区块(按关键词分组,与热榜统计格式一致)
  881. Args:
  882. rss_stats: RSS 关键词统计列表,格式与热榜 stats 一致:
  883. [{"word": "AI", "count": 5, "titles": [...]}]
  884. format_type: 格式类型
  885. feishu_separator: 飞书分隔符
  886. base_header: 基础头部
  887. base_footer: 基础尾部
  888. max_bytes: 最大字节数
  889. current_batch: 当前批次内容
  890. current_batch_has_content: 当前批次是否有内容
  891. batches: 已完成的批次列表
  892. timezone: 时区名称
  893. add_separator: 是否在区块前添加分割线(第一个区域时为 False)
  894. Returns:
  895. (current_batch, current_batch_has_content, batches) 元组
  896. """
  897. if not rss_stats:
  898. return current_batch, current_batch_has_content, batches
  899. # 计算总条目数
  900. total_items = sum(stat["count"] for stat in rss_stats)
  901. total_keywords = len(rss_stats)
  902. # RSS 统计区块标题(根据 add_separator 决定是否添加前置分割线)
  903. rss_header = ""
  904. if add_separator and current_batch_has_content:
  905. # 需要添加分割线
  906. if format_type == "feishu":
  907. rss_header = f"\n{feishu_separator}\n\n📰 **RSS 订阅统计** (共 {total_items} 条)\n\n"
  908. elif format_type == "dingtalk":
  909. rss_header = f"\n---\n\n📰 **RSS 订阅统计** (共 {total_items} 条)\n\n"
  910. elif format_type in ("wework", "bark"):
  911. rss_header = f"\n\n\n\n📰 **RSS 订阅统计** (共 {total_items} 条)\n\n"
  912. elif format_type == "telegram":
  913. rss_header = f"\n\n📰 RSS 订阅统计 (共 {total_items} 条)\n\n"
  914. elif format_type == "slack":
  915. rss_header = f"\n\n📰 *RSS 订阅统计* (共 {total_items} 条)\n\n"
  916. else:
  917. rss_header = f"\n\n📰 **RSS 订阅统计** (共 {total_items} 条)\n\n"
  918. else:
  919. # 不需要分割线(第一个区域)
  920. if format_type == "feishu":
  921. rss_header = f"📰 **RSS 订阅统计** (共 {total_items} 条)\n\n"
  922. elif format_type == "dingtalk":
  923. rss_header = f"📰 **RSS 订阅统计** (共 {total_items} 条)\n\n"
  924. elif format_type == "telegram":
  925. rss_header = f"📰 RSS 订阅统计 (共 {total_items} 条)\n\n"
  926. elif format_type == "slack":
  927. rss_header = f"📰 *RSS 订阅统计* (共 {total_items} 条)\n\n"
  928. else:
  929. rss_header = f"📰 **RSS 订阅统计** (共 {total_items} 条)\n\n"
  930. # 添加 RSS 标题
  931. test_content = current_batch + rss_header
  932. if len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8")) < max_bytes:
  933. current_batch = test_content
  934. current_batch_has_content = True
  935. else:
  936. if current_batch_has_content:
  937. _safe_append_batch(batches, current_batch, base_footer, max_bytes, base_header)
  938. current_batch = _safe_new_batch(
  939. base_header + rss_header, base_footer, max_bytes, base_header, batches
  940. )
  941. current_batch_has_content = True
  942. # 逐个处理关键词组(与热榜一致)
  943. for i, stat in enumerate(rss_stats):
  944. word = stat["word"]
  945. count = stat["count"]
  946. sequence_display = f"[{i + 1}/{total_keywords}]"
  947. # 构建关键词标题(与热榜格式一致)
  948. word_header = ""
  949. if format_type in ("wework", "bark"):
  950. if count >= 10:
  951. word_header = f"🔥 {sequence_display} **{word}** : **{count}** 条\n\n"
  952. elif count >= 5:
  953. word_header = f"📈 {sequence_display} **{word}** : **{count}** 条\n\n"
  954. else:
  955. word_header = f"📌 {sequence_display} **{word}** : {count} 条\n\n"
  956. elif format_type == "telegram":
  957. if count >= 10:
  958. word_header = f"🔥 {sequence_display} {word} : {count} 条\n\n"
  959. elif count >= 5:
  960. word_header = f"📈 {sequence_display} {word} : {count} 条\n\n"
  961. else:
  962. word_header = f"📌 {sequence_display} {word} : {count} 条\n\n"
  963. elif format_type == "ntfy":
  964. if count >= 10:
  965. word_header = f"🔥 {sequence_display} **{word}** : **{count}** 条\n\n"
  966. elif count >= 5:
  967. word_header = f"📈 {sequence_display} **{word}** : **{count}** 条\n\n"
  968. else:
  969. word_header = f"📌 {sequence_display} **{word}** : {count} 条\n\n"
  970. elif format_type == "feishu":
  971. if count >= 10:
  972. word_header = f"🔥 <font color='grey'>{sequence_display}</font> **{word}** : <font color='red'>{count}</font> 条\n\n"
  973. elif count >= 5:
  974. word_header = f"📈 <font color='grey'>{sequence_display}</font> **{word}** : <font color='orange'>{count}</font> 条\n\n"
  975. else:
  976. word_header = f"📌 <font color='grey'>{sequence_display}</font> **{word}** : {count} 条\n\n"
  977. elif format_type == "dingtalk":
  978. if count >= 10:
  979. word_header = f"🔥 {sequence_display} **{word}** : **{count}** 条\n\n"
  980. elif count >= 5:
  981. word_header = f"📈 {sequence_display} **{word}** : **{count}** 条\n\n"
  982. else:
  983. word_header = f"📌 {sequence_display} **{word}** : {count} 条\n\n"
  984. elif format_type == "slack":
  985. if count >= 10:
  986. word_header = f"🔥 {sequence_display} *{word}* : *{count}* 条\n\n"
  987. elif count >= 5:
  988. word_header = f"📈 {sequence_display} *{word}* : *{count}* 条\n\n"
  989. else:
  990. word_header = f"📌 {sequence_display} *{word}* : {count} 条\n\n"
  991. # 构建第一条新闻(使用 format_title_for_platform)
  992. first_news_line = ""
  993. if stat["titles"]:
  994. first_title_data = stat["titles"][0]
  995. if format_type in ("wework", "bark"):
  996. formatted_title = format_title_for_platform("wework", first_title_data, show_source=True)
  997. elif format_type == "telegram":
  998. formatted_title = format_title_for_platform("telegram", first_title_data, show_source=True)
  999. elif format_type == "ntfy":
  1000. formatted_title = format_title_for_platform("ntfy", first_title_data, show_source=True)
  1001. elif format_type == "feishu":
  1002. formatted_title = format_title_for_platform("feishu", first_title_data, show_source=True)
  1003. elif format_type == "dingtalk":
  1004. formatted_title = format_title_for_platform("dingtalk", first_title_data, show_source=True)
  1005. elif format_type == "slack":
  1006. formatted_title = format_title_for_platform("slack", first_title_data, show_source=True)
  1007. else:
  1008. formatted_title = f"{first_title_data['title']}"
  1009. first_news_line = f" 1. {formatted_title}\n"
  1010. if len(stat["titles"]) > 1:
  1011. first_news_line += "\n"
  1012. # 原子性检查:关键词标题 + 第一条新闻必须一起处理
  1013. word_with_first_news = word_header + first_news_line
  1014. test_content = current_batch + word_with_first_news
  1015. if len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8")) >= max_bytes:
  1016. if current_batch_has_content:
  1017. _safe_append_batch(batches, current_batch, base_footer, max_bytes, base_header)
  1018. current_batch = _safe_new_batch(
  1019. base_header + rss_header + word_with_first_news,
  1020. base_footer, max_bytes, base_header, batches
  1021. )
  1022. current_batch_has_content = True
  1023. start_index = 1
  1024. else:
  1025. current_batch = test_content
  1026. current_batch_has_content = True
  1027. start_index = 1
  1028. # 处理剩余新闻条目
  1029. for j in range(start_index, len(stat["titles"])):
  1030. title_data = stat["titles"][j]
  1031. if format_type in ("wework", "bark"):
  1032. formatted_title = format_title_for_platform("wework", title_data, show_source=True)
  1033. elif format_type == "telegram":
  1034. formatted_title = format_title_for_platform("telegram", title_data, show_source=True)
  1035. elif format_type == "ntfy":
  1036. formatted_title = format_title_for_platform("ntfy", title_data, show_source=True)
  1037. elif format_type == "feishu":
  1038. formatted_title = format_title_for_platform("feishu", title_data, show_source=True)
  1039. elif format_type == "dingtalk":
  1040. formatted_title = format_title_for_platform("dingtalk", title_data, show_source=True)
  1041. elif format_type == "slack":
  1042. formatted_title = format_title_for_platform("slack", title_data, show_source=True)
  1043. else:
  1044. formatted_title = f"{title_data['title']}"
  1045. news_line = f" {j + 1}. {formatted_title}\n"
  1046. if j < len(stat["titles"]) - 1:
  1047. news_line += "\n"
  1048. test_content = current_batch + news_line
  1049. if len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8")) >= max_bytes:
  1050. if current_batch_has_content:
  1051. _safe_append_batch(batches, current_batch, base_footer, max_bytes, base_header)
  1052. current_batch = _safe_new_batch(
  1053. base_header + rss_header + word_header + news_line,
  1054. base_footer, max_bytes, base_header, batches
  1055. )
  1056. current_batch_has_content = True
  1057. else:
  1058. current_batch = test_content
  1059. current_batch_has_content = True
  1060. # 关键词间分隔符
  1061. if i < len(rss_stats) - 1:
  1062. separator = ""
  1063. if format_type in ("wework", "bark"):
  1064. separator = "\n\n\n\n"
  1065. elif format_type == "telegram":
  1066. separator = "\n\n"
  1067. elif format_type == "ntfy":
  1068. separator = "\n\n"
  1069. elif format_type == "feishu":
  1070. separator = f"\n{feishu_separator}\n\n"
  1071. elif format_type == "dingtalk":
  1072. separator = "\n---\n\n"
  1073. elif format_type == "slack":
  1074. separator = "\n\n"
  1075. test_content = current_batch + separator
  1076. if len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8")) < max_bytes:
  1077. current_batch = test_content
  1078. return current_batch, current_batch_has_content, batches
  1079. def _process_rss_new_titles_section(
  1080. rss_new_stats: list,
  1081. format_type: str,
  1082. feishu_separator: str,
  1083. base_header: str,
  1084. base_footer: str,
  1085. max_bytes: int,
  1086. current_batch: str,
  1087. current_batch_has_content: bool,
  1088. batches: List[str],
  1089. timezone: str = DEFAULT_TIMEZONE,
  1090. add_separator: bool = True,
  1091. ) -> tuple:
  1092. """处理 RSS 新增区块(按来源分组,与热榜新增格式一致)
  1093. Args:
  1094. rss_new_stats: RSS 新增关键词统计列表,格式与热榜 stats 一致:
  1095. [{"word": "AI", "count": 5, "titles": [...]}]
  1096. format_type: 格式类型
  1097. feishu_separator: 飞书分隔符
  1098. base_header: 基础头部
  1099. base_footer: 基础尾部
  1100. max_bytes: 最大字节数
  1101. current_batch: 当前批次内容
  1102. current_batch_has_content: 当前批次是否有内容
  1103. batches: 已完成的批次列表
  1104. timezone: 时区名称
  1105. add_separator: 是否在区块前添加分割线(第一个区域时为 False)
  1106. Returns:
  1107. (current_batch, current_batch_has_content, batches) 元组
  1108. """
  1109. if not rss_new_stats:
  1110. return current_batch, current_batch_has_content, batches
  1111. # 从关键词分组中提取所有条目,重新按来源分组
  1112. source_map = {}
  1113. for stat in rss_new_stats:
  1114. for title_data in stat.get("titles", []):
  1115. source_name = title_data.get("source_name", "未知来源")
  1116. if source_name not in source_map:
  1117. source_map[source_name] = []
  1118. source_map[source_name].append(title_data)
  1119. if not source_map:
  1120. return current_batch, current_batch_has_content, batches
  1121. # 计算总条目数
  1122. total_items = sum(len(titles) for titles in source_map.values())
  1123. # RSS 新增区块标题(根据 add_separator 决定是否添加前置分割线)
  1124. new_header = ""
  1125. if add_separator and current_batch_has_content:
  1126. # 需要添加分割线
  1127. if format_type in ("wework", "bark"):
  1128. new_header = f"\n\n\n\n🆕 **RSS 本次新增** (共 {total_items} 条)\n\n"
  1129. elif format_type == "telegram":
  1130. new_header = f"\n\n🆕 RSS 本次新增 (共 {total_items} 条)\n\n"
  1131. elif format_type == "ntfy":
  1132. new_header = f"\n\n🆕 **RSS 本次新增** (共 {total_items} 条)\n\n"
  1133. elif format_type == "feishu":
  1134. new_header = f"\n{feishu_separator}\n\n🆕 **RSS 本次新增** (共 {total_items} 条)\n\n"
  1135. elif format_type == "dingtalk":
  1136. new_header = f"\n---\n\n🆕 **RSS 本次新增** (共 {total_items} 条)\n\n"
  1137. elif format_type == "slack":
  1138. new_header = f"\n\n🆕 *RSS 本次新增* (共 {total_items} 条)\n\n"
  1139. else:
  1140. # 不需要分割线(第一个区域)
  1141. if format_type in ("wework", "bark"):
  1142. new_header = f"🆕 **RSS 本次新增** (共 {total_items} 条)\n\n"
  1143. elif format_type == "telegram":
  1144. new_header = f"🆕 RSS 本次新增 (共 {total_items} 条)\n\n"
  1145. elif format_type == "ntfy":
  1146. new_header = f"🆕 **RSS 本次新增** (共 {total_items} 条)\n\n"
  1147. elif format_type == "feishu":
  1148. new_header = f"🆕 **RSS 本次新增** (共 {total_items} 条)\n\n"
  1149. elif format_type == "dingtalk":
  1150. new_header = f"🆕 **RSS 本次新增** (共 {total_items} 条)\n\n"
  1151. elif format_type == "slack":
  1152. new_header = f"🆕 *RSS 本次新增* (共 {total_items} 条)\n\n"
  1153. # 添加 RSS 新增标题
  1154. test_content = current_batch + new_header
  1155. if len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8")) >= max_bytes:
  1156. if current_batch_has_content:
  1157. _safe_append_batch(batches, current_batch, base_footer, max_bytes, base_header)
  1158. current_batch = _safe_new_batch(
  1159. base_header + new_header, base_footer, max_bytes, base_header, batches
  1160. )
  1161. current_batch_has_content = True
  1162. else:
  1163. current_batch = test_content
  1164. current_batch_has_content = True
  1165. # 按来源分组显示(与热榜新增格式一致)
  1166. source_list = list(source_map.items())
  1167. for i, (source_name, titles) in enumerate(source_list):
  1168. count = len(titles)
  1169. # 构建来源标题(与热榜新增格式一致)
  1170. source_header = ""
  1171. if format_type in ("wework", "bark"):
  1172. source_header = f"**{source_name}** ({count} 条):\n\n"
  1173. elif format_type == "telegram":
  1174. source_header = f"{source_name} ({count} 条):\n\n"
  1175. elif format_type == "ntfy":
  1176. source_header = f"**{source_name}** ({count} 条):\n\n"
  1177. elif format_type == "feishu":
  1178. source_header = f"**{source_name}** ({count} 条):\n\n"
  1179. elif format_type == "dingtalk":
  1180. source_header = f"**{source_name}** ({count} 条):\n\n"
  1181. elif format_type == "slack":
  1182. source_header = f"*{source_name}* ({count} 条):\n\n"
  1183. # 构建第一条新闻(不显示来源,禁用 new emoji)
  1184. first_news_line = ""
  1185. if titles:
  1186. first_title_data = titles[0].copy()
  1187. first_title_data["is_new"] = False
  1188. if format_type in ("wework", "bark"):
  1189. formatted_title = format_title_for_platform("wework", first_title_data, show_source=False)
  1190. elif format_type == "telegram":
  1191. formatted_title = format_title_for_platform("telegram", first_title_data, show_source=False)
  1192. elif format_type == "ntfy":
  1193. formatted_title = format_title_for_platform("ntfy", first_title_data, show_source=False)
  1194. elif format_type == "feishu":
  1195. formatted_title = format_title_for_platform("feishu", first_title_data, show_source=False)
  1196. elif format_type == "dingtalk":
  1197. formatted_title = format_title_for_platform("dingtalk", first_title_data, show_source=False)
  1198. elif format_type == "slack":
  1199. formatted_title = format_title_for_platform("slack", first_title_data, show_source=False)
  1200. else:
  1201. formatted_title = f"{first_title_data['title']}"
  1202. first_news_line = f" 1. {formatted_title}\n"
  1203. # 原子性检查:来源标题 + 第一条新闻必须一起处理
  1204. source_with_first_news = source_header + first_news_line
  1205. test_content = current_batch + source_with_first_news
  1206. if len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8")) >= max_bytes:
  1207. if current_batch_has_content:
  1208. _safe_append_batch(batches, current_batch, base_footer, max_bytes, base_header)
  1209. current_batch = _safe_new_batch(
  1210. base_header + new_header + source_with_first_news,
  1211. base_footer, max_bytes, base_header, batches
  1212. )
  1213. current_batch_has_content = True
  1214. start_index = 1
  1215. else:
  1216. current_batch = test_content
  1217. current_batch_has_content = True
  1218. start_index = 1
  1219. # 处理剩余新闻条目(禁用 new emoji)
  1220. for j in range(start_index, len(titles)):
  1221. title_data = titles[j].copy()
  1222. title_data["is_new"] = False
  1223. if format_type in ("wework", "bark"):
  1224. formatted_title = format_title_for_platform("wework", title_data, show_source=False)
  1225. elif format_type == "telegram":
  1226. formatted_title = format_title_for_platform("telegram", title_data, show_source=False)
  1227. elif format_type == "ntfy":
  1228. formatted_title = format_title_for_platform("ntfy", title_data, show_source=False)
  1229. elif format_type == "feishu":
  1230. formatted_title = format_title_for_platform("feishu", title_data, show_source=False)
  1231. elif format_type == "dingtalk":
  1232. formatted_title = format_title_for_platform("dingtalk", title_data, show_source=False)
  1233. elif format_type == "slack":
  1234. formatted_title = format_title_for_platform("slack", title_data, show_source=False)
  1235. else:
  1236. formatted_title = f"{title_data['title']}"
  1237. news_line = f" {j + 1}. {formatted_title}\n"
  1238. test_content = current_batch + news_line
  1239. if len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8")) >= max_bytes:
  1240. if current_batch_has_content:
  1241. _safe_append_batch(batches, current_batch, base_footer, max_bytes, base_header)
  1242. current_batch = _safe_new_batch(
  1243. base_header + new_header + source_header + news_line,
  1244. base_footer, max_bytes, base_header, batches
  1245. )
  1246. current_batch_has_content = True
  1247. else:
  1248. current_batch = test_content
  1249. current_batch_has_content = True
  1250. # 来源间添加空行(与热榜新增格式一致)
  1251. current_batch += "\n"
  1252. return current_batch, current_batch_has_content, batches
  1253. def _format_rss_item_line(
  1254. item: Dict,
  1255. index: int,
  1256. format_type: str,
  1257. timezone: str = DEFAULT_TIMEZONE,
  1258. ) -> str:
  1259. """格式化单条 RSS 条目
  1260. Args:
  1261. item: RSS 条目字典
  1262. index: 序号
  1263. format_type: 格式类型
  1264. timezone: 时区名称
  1265. Returns:
  1266. 格式化后的条目行字符串
  1267. """
  1268. title = item.get("title", "")
  1269. url = item.get("url", "")
  1270. published_at = item.get("published_at", "")
  1271. # 使用友好时间格式
  1272. if published_at:
  1273. friendly_time = format_iso_time_friendly(published_at, timezone, include_date=True)
  1274. else:
  1275. friendly_time = ""
  1276. # 构建条目行
  1277. if format_type == "feishu":
  1278. if url:
  1279. item_line = f" {index}. [{title}]({url})"
  1280. else:
  1281. item_line = f" {index}. {title}"
  1282. if friendly_time:
  1283. item_line += f" <font color='grey'>- {friendly_time}</font>"
  1284. elif format_type == "telegram":
  1285. if url:
  1286. item_line = f" {index}. {title} ({url})"
  1287. else:
  1288. item_line = f" {index}. {title}"
  1289. if friendly_time:
  1290. item_line += f" - {friendly_time}"
  1291. else:
  1292. if url:
  1293. item_line = f" {index}. [{title}]({url})"
  1294. else:
  1295. item_line = f" {index}. {title}"
  1296. if friendly_time:
  1297. item_line += f" `{friendly_time}`"
  1298. item_line += "\n"
  1299. return item_line
  1300. def _process_standalone_section(
  1301. standalone_data: Dict,
  1302. format_type: str,
  1303. feishu_separator: str,
  1304. base_header: str,
  1305. base_footer: str,
  1306. max_bytes: int,
  1307. current_batch: str,
  1308. current_batch_has_content: bool,
  1309. batches: List[str],
  1310. timezone: str = DEFAULT_TIMEZONE,
  1311. rank_threshold: int = 10,
  1312. add_separator: bool = True,
  1313. ) -> tuple:
  1314. """处理独立展示区区块
  1315. 独立展示区显示指定平台的完整热榜或 RSS 源内容,不受关键词过滤影响。
  1316. 热榜按原始排名排序,RSS 按发布时间排序。
  1317. Args:
  1318. standalone_data: 独立展示数据,格式:
  1319. {
  1320. "platforms": [{"id": "zhihu", "name": "知乎热榜", "items": [...]}],
  1321. "rss_feeds": [{"id": "hacker-news", "name": "Hacker News", "items": [...]}]
  1322. }
  1323. format_type: 格式类型
  1324. feishu_separator: 飞书分隔符
  1325. base_header: 基础头部
  1326. base_footer: 基础尾部
  1327. max_bytes: 最大字节数
  1328. current_batch: 当前批次内容
  1329. current_batch_has_content: 当前批次是否有内容
  1330. batches: 已完成的批次列表
  1331. timezone: 时区名称
  1332. rank_threshold: 排名高亮阈值
  1333. add_separator: 是否在区块前添加分割线(第一个区域时为 False)
  1334. Returns:
  1335. (current_batch, current_batch_has_content, batches) 元组
  1336. """
  1337. if not standalone_data:
  1338. return current_batch, current_batch_has_content, batches
  1339. platforms = standalone_data.get("platforms", [])
  1340. rss_feeds = standalone_data.get("rss_feeds", [])
  1341. if not platforms and not rss_feeds:
  1342. return current_batch, current_batch_has_content, batches
  1343. # 计算总条目数
  1344. total_platform_items = sum(len(p.get("items", [])) for p in platforms)
  1345. total_rss_items = sum(len(f.get("items", [])) for f in rss_feeds)
  1346. total_items = total_platform_items + total_rss_items
  1347. # 独立展示区标题(根据 add_separator 决定是否添加前置分割线)
  1348. section_header = ""
  1349. if add_separator and current_batch_has_content:
  1350. # 需要添加分割线
  1351. if format_type == "feishu":
  1352. section_header = f"\n{feishu_separator}\n\n📋 **独立展示区** (共 {total_items} 条)\n\n"
  1353. elif format_type == "dingtalk":
  1354. section_header = f"\n---\n\n📋 **独立展示区** (共 {total_items} 条)\n\n"
  1355. elif format_type in ("wework", "bark"):
  1356. section_header = f"\n\n\n\n📋 **独立展示区** (共 {total_items} 条)\n\n"
  1357. elif format_type == "telegram":
  1358. section_header = f"\n\n📋 独立展示区 (共 {total_items} 条)\n\n"
  1359. elif format_type == "slack":
  1360. section_header = f"\n\n📋 *独立展示区* (共 {total_items} 条)\n\n"
  1361. else:
  1362. section_header = f"\n\n📋 **独立展示区** (共 {total_items} 条)\n\n"
  1363. else:
  1364. # 不需要分割线(第一个区域)
  1365. if format_type == "feishu":
  1366. section_header = f"📋 **独立展示区** (共 {total_items} 条)\n\n"
  1367. elif format_type == "dingtalk":
  1368. section_header = f"📋 **独立展示区** (共 {total_items} 条)\n\n"
  1369. elif format_type == "telegram":
  1370. section_header = f"📋 独立展示区 (共 {total_items} 条)\n\n"
  1371. elif format_type == "slack":
  1372. section_header = f"📋 *独立展示区* (共 {total_items} 条)\n\n"
  1373. else:
  1374. section_header = f"📋 **独立展示区** (共 {total_items} 条)\n\n"
  1375. # 添加区块标题
  1376. test_content = current_batch + section_header
  1377. if len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8")) < max_bytes:
  1378. current_batch = test_content
  1379. current_batch_has_content = True
  1380. else:
  1381. if current_batch_has_content:
  1382. _safe_append_batch(batches, current_batch, base_footer, max_bytes, base_header)
  1383. current_batch = _safe_new_batch(
  1384. base_header + section_header, base_footer, max_bytes, base_header, batches
  1385. )
  1386. current_batch_has_content = True
  1387. # 处理热榜平台
  1388. for platform in platforms:
  1389. platform_name = platform.get("name", platform.get("id", ""))
  1390. items = platform.get("items", [])
  1391. if not items:
  1392. continue
  1393. # 平台标题
  1394. platform_header = ""
  1395. if format_type in ("wework", "bark"):
  1396. platform_header = f"**{platform_name}** ({len(items)} 条):\n\n"
  1397. elif format_type == "telegram":
  1398. platform_header = f"{platform_name} ({len(items)} 条):\n\n"
  1399. elif format_type == "ntfy":
  1400. platform_header = f"**{platform_name}** ({len(items)} 条):\n\n"
  1401. elif format_type == "feishu":
  1402. platform_header = f"**{platform_name}** ({len(items)} 条):\n\n"
  1403. elif format_type == "dingtalk":
  1404. platform_header = f"**{platform_name}** ({len(items)} 条):\n\n"
  1405. elif format_type == "slack":
  1406. platform_header = f"*{platform_name}* ({len(items)} 条):\n\n"
  1407. # 构建第一条新闻
  1408. first_item_line = ""
  1409. if items:
  1410. first_item_line = _format_standalone_platform_item(items[0], 1, format_type, rank_threshold)
  1411. # 原子性检查
  1412. platform_with_first = platform_header + first_item_line
  1413. test_content = current_batch + platform_with_first
  1414. if len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8")) >= max_bytes:
  1415. if current_batch_has_content:
  1416. _safe_append_batch(batches, current_batch, base_footer, max_bytes, base_header)
  1417. current_batch = _safe_new_batch(
  1418. base_header + section_header + platform_with_first,
  1419. base_footer, max_bytes, base_header, batches
  1420. )
  1421. current_batch_has_content = True
  1422. start_index = 1
  1423. else:
  1424. current_batch = test_content
  1425. current_batch_has_content = True
  1426. start_index = 1
  1427. # 处理剩余条目
  1428. for j in range(start_index, len(items)):
  1429. item_line = _format_standalone_platform_item(items[j], j + 1, format_type, rank_threshold)
  1430. test_content = current_batch + item_line
  1431. if len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8")) >= max_bytes:
  1432. if current_batch_has_content:
  1433. _safe_append_batch(batches, current_batch, base_footer, max_bytes, base_header)
  1434. current_batch = _safe_new_batch(
  1435. base_header + section_header + platform_header + item_line,
  1436. base_footer, max_bytes, base_header, batches
  1437. )
  1438. current_batch_has_content = True
  1439. else:
  1440. current_batch = test_content
  1441. current_batch_has_content = True
  1442. current_batch += "\n"
  1443. # 处理 RSS 源
  1444. for feed in rss_feeds:
  1445. feed_name = feed.get("name", feed.get("id", ""))
  1446. items = feed.get("items", [])
  1447. if not items:
  1448. continue
  1449. # RSS 源标题
  1450. feed_header = ""
  1451. if format_type in ("wework", "bark"):
  1452. feed_header = f"**{feed_name}** ({len(items)} 条):\n\n"
  1453. elif format_type == "telegram":
  1454. feed_header = f"{feed_name} ({len(items)} 条):\n\n"
  1455. elif format_type == "ntfy":
  1456. feed_header = f"**{feed_name}** ({len(items)} 条):\n\n"
  1457. elif format_type == "feishu":
  1458. feed_header = f"**{feed_name}** ({len(items)} 条):\n\n"
  1459. elif format_type == "dingtalk":
  1460. feed_header = f"**{feed_name}** ({len(items)} 条):\n\n"
  1461. elif format_type == "slack":
  1462. feed_header = f"*{feed_name}* ({len(items)} 条):\n\n"
  1463. # 构建第一条 RSS
  1464. first_item_line = ""
  1465. if items:
  1466. first_item_line = _format_standalone_rss_item(items[0], 1, format_type, timezone)
  1467. # 原子性检查
  1468. feed_with_first = feed_header + first_item_line
  1469. test_content = current_batch + feed_with_first
  1470. if len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8")) >= max_bytes:
  1471. if current_batch_has_content:
  1472. _safe_append_batch(batches, current_batch, base_footer, max_bytes, base_header)
  1473. current_batch = _safe_new_batch(
  1474. base_header + section_header + feed_with_first,
  1475. base_footer, max_bytes, base_header, batches
  1476. )
  1477. current_batch_has_content = True
  1478. start_index = 1
  1479. else:
  1480. current_batch = test_content
  1481. current_batch_has_content = True
  1482. start_index = 1
  1483. # 处理剩余条目
  1484. for j in range(start_index, len(items)):
  1485. item_line = _format_standalone_rss_item(items[j], j + 1, format_type, timezone)
  1486. test_content = current_batch + item_line
  1487. if len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8")) >= max_bytes:
  1488. if current_batch_has_content:
  1489. _safe_append_batch(batches, current_batch, base_footer, max_bytes, base_header)
  1490. current_batch = _safe_new_batch(
  1491. base_header + section_header + feed_header + item_line,
  1492. base_footer, max_bytes, base_header, batches
  1493. )
  1494. current_batch_has_content = True
  1495. else:
  1496. current_batch = test_content
  1497. current_batch_has_content = True
  1498. current_batch += "\n"
  1499. return current_batch, current_batch_has_content, batches
  1500. def _format_standalone_platform_item(item: Dict, index: int, format_type: str, rank_threshold: int = 10) -> str:
  1501. """格式化独立展示区的热榜条目(复用热点词汇统计区样式)
  1502. Args:
  1503. item: 热榜条目,包含 title, url, rank, ranks, first_time, last_time, count
  1504. index: 序号
  1505. format_type: 格式类型
  1506. rank_threshold: 排名高亮阈值
  1507. Returns:
  1508. 格式化后的条目行字符串
  1509. """
  1510. title = item.get("title", "")
  1511. url = item.get("url", "") or item.get("mobileUrl", "")
  1512. ranks = item.get("ranks", [])
  1513. rank = item.get("rank", 0)
  1514. first_time = item.get("first_time", "")
  1515. last_time = item.get("last_time", "")
  1516. count = item.get("count", 1)
  1517. # 使用 format_rank_display 格式化排名(复用热点词汇统计区逻辑)
  1518. # 如果没有 ranks 列表,用单个 rank 构造
  1519. if not ranks and rank > 0:
  1520. ranks = [rank]
  1521. rank_display = format_rank_display(ranks, rank_threshold, format_type) if ranks else ""
  1522. # 构建时间显示(用 ~ 连接范围,与热点词汇统计区一致)
  1523. # 将 HH-MM 格式转换为 HH:MM 格式
  1524. time_display = ""
  1525. if first_time and last_time and first_time != last_time:
  1526. first_time_display = convert_time_for_display(first_time)
  1527. last_time_display = convert_time_for_display(last_time)
  1528. time_display = f"{first_time_display}~{last_time_display}"
  1529. elif first_time:
  1530. time_display = convert_time_for_display(first_time)
  1531. # 构建次数显示(格式为 (N次),与热点词汇统计区一致)
  1532. count_display = f"({count}次)" if count > 1 else ""
  1533. # 根据格式类型构建条目行(复用热点词汇统计区样式)
  1534. if format_type == "feishu":
  1535. if url:
  1536. item_line = f" {index}. [{title}]({url})"
  1537. else:
  1538. item_line = f" {index}. {title}"
  1539. if rank_display:
  1540. item_line += f" {rank_display}"
  1541. if time_display:
  1542. item_line += f" <font color='grey'>- {time_display}</font>"
  1543. if count_display:
  1544. item_line += f" <font color='green'>{count_display}</font>"
  1545. elif format_type == "dingtalk":
  1546. if url:
  1547. item_line = f" {index}. [{title}]({url})"
  1548. else:
  1549. item_line = f" {index}. {title}"
  1550. if rank_display:
  1551. item_line += f" {rank_display}"
  1552. if time_display:
  1553. item_line += f" - {time_display}"
  1554. if count_display:
  1555. item_line += f" {count_display}"
  1556. elif format_type == "telegram":
  1557. if url:
  1558. item_line = f" {index}. {title} ({url})"
  1559. else:
  1560. item_line = f" {index}. {title}"
  1561. if rank_display:
  1562. item_line += f" {rank_display}"
  1563. if time_display:
  1564. item_line += f" - {time_display}"
  1565. if count_display:
  1566. item_line += f" {count_display}"
  1567. elif format_type == "slack":
  1568. if url:
  1569. item_line = f" {index}. <{url}|{title}>"
  1570. else:
  1571. item_line = f" {index}. {title}"
  1572. if rank_display:
  1573. item_line += f" {rank_display}"
  1574. if time_display:
  1575. item_line += f" _{time_display}_"
  1576. if count_display:
  1577. item_line += f" {count_display}"
  1578. else:
  1579. # wework, bark, ntfy
  1580. if url:
  1581. item_line = f" {index}. [{title}]({url})"
  1582. else:
  1583. item_line = f" {index}. {title}"
  1584. if rank_display:
  1585. item_line += f" {rank_display}"
  1586. if time_display:
  1587. item_line += f" - {time_display}"
  1588. if count_display:
  1589. item_line += f" {count_display}"
  1590. item_line += "\n"
  1591. return item_line
  1592. def _format_standalone_rss_item(
  1593. item: Dict, index: int, format_type: str, timezone: str = "Asia/Shanghai"
  1594. ) -> str:
  1595. """格式化独立展示区的 RSS 条目
  1596. Args:
  1597. item: RSS 条目,包含 title, url, published_at, author
  1598. index: 序号
  1599. format_type: 格式类型
  1600. timezone: 时区名称
  1601. Returns:
  1602. 格式化后的条目行字符串
  1603. """
  1604. title = item.get("title", "")
  1605. url = item.get("url", "")
  1606. published_at = item.get("published_at", "")
  1607. author = item.get("author", "")
  1608. # 使用友好时间格式
  1609. friendly_time = ""
  1610. if published_at:
  1611. friendly_time = format_iso_time_friendly(published_at, timezone, include_date=True)
  1612. # 构建元信息
  1613. meta_parts = []
  1614. if friendly_time:
  1615. meta_parts.append(friendly_time)
  1616. if author:
  1617. meta_parts.append(author)
  1618. meta_str = ", ".join(meta_parts)
  1619. # 根据格式类型构建条目行
  1620. if format_type == "feishu":
  1621. if url:
  1622. item_line = f" {index}. [{title}]({url})"
  1623. else:
  1624. item_line = f" {index}. {title}"
  1625. if meta_str:
  1626. item_line += f" <font color='grey'>- {meta_str}</font>"
  1627. elif format_type == "telegram":
  1628. if url:
  1629. item_line = f" {index}. {title} ({url})"
  1630. else:
  1631. item_line = f" {index}. {title}"
  1632. if meta_str:
  1633. item_line += f" - {meta_str}"
  1634. elif format_type == "slack":
  1635. if url:
  1636. item_line = f" {index}. <{url}|{title}>"
  1637. else:
  1638. item_line = f" {index}. {title}"
  1639. if meta_str:
  1640. item_line += f" _{meta_str}_"
  1641. else:
  1642. # wework, bark, ntfy, dingtalk
  1643. if url:
  1644. item_line = f" {index}. [{title}]({url})"
  1645. else:
  1646. item_line = f" {index}. {title}"
  1647. if meta_str:
  1648. item_line += f" `{meta_str}`"
  1649. item_line += "\n"
  1650. return item_line