splitter.py 72 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672
  1. # coding=utf-8
  2. """
  3. 消息分批处理模块
  4. 提供消息内容分批拆分功能,确保消息大小不超过各平台限制
  5. """
  6. from datetime import datetime
  7. from typing import Dict, List, Optional, Callable
  8. from trendradar.report.formatter import format_title_for_platform
  9. from trendradar.report.helpers import format_rank_display
  10. from trendradar.utils.time import DEFAULT_TIMEZONE, format_iso_time_friendly, convert_time_for_display
  11. # 默认批次大小配置
  12. DEFAULT_BATCH_SIZES = {
  13. "dingtalk": 20000,
  14. "feishu": 29000,
  15. "ntfy": 3800,
  16. "default": 4000,
  17. }
  18. # 默认区域顺序
  19. DEFAULT_REGION_ORDER = ["hotlist", "rss", "new_items", "standalone", "ai_analysis"]
  20. def split_content_into_batches(
  21. report_data: Dict,
  22. format_type: str,
  23. update_info: Optional[Dict] = None,
  24. max_bytes: Optional[int] = None,
  25. mode: str = "daily",
  26. batch_sizes: Optional[Dict[str, int]] = None,
  27. feishu_separator: str = "---",
  28. region_order: Optional[List[str]] = None,
  29. get_time_func: Optional[Callable[[], datetime]] = None,
  30. rss_items: Optional[list] = None,
  31. rss_new_items: Optional[list] = None,
  32. timezone: str = DEFAULT_TIMEZONE,
  33. display_mode: str = "keyword",
  34. ai_content: Optional[str] = None,
  35. standalone_data: Optional[Dict] = None,
  36. rank_threshold: int = 10,
  37. ai_stats: Optional[Dict] = None,
  38. report_type: str = "热点分析报告",
  39. show_new_section: bool = True,
  40. ) -> List[str]:
  41. """分批处理消息内容,确保词组标题+至少第一条新闻的完整性(支持热榜+RSS合并+AI分析+独立展示区)
  42. 热榜统计与RSS统计并列显示,热榜新增与RSS新增并列显示。
  43. region_order 控制各区域的显示顺序。
  44. AI分析内容根据 region_order 中的位置显示。
  45. 独立展示区根据 region_order 中的位置显示。
  46. Args:
  47. report_data: 报告数据字典,包含 stats, new_titles, failed_ids, total_new_count
  48. format_type: 格式类型 (feishu, dingtalk, wework, telegram, ntfy, bark, slack)
  49. update_info: 版本更新信息(可选)
  50. max_bytes: 最大字节数(可选,如果不指定则使用默认配置)
  51. mode: 报告模式 (daily, incremental, current)
  52. batch_sizes: 批次大小配置字典(可选)
  53. feishu_separator: 飞书消息分隔符
  54. region_order: 区域显示顺序列表
  55. get_time_func: 获取当前时间的函数(可选)
  56. rss_items: RSS 统计条目列表(按源分组,用于合并推送)
  57. rss_new_items: RSS 新增条目列表(可选,用于新增区块)
  58. timezone: 时区名称(用于 RSS 时间格式化)
  59. display_mode: 显示模式 (keyword=按关键词分组, platform=按平台分组)
  60. ai_content: AI 分析内容(已渲染的字符串,可选)
  61. standalone_data: 独立展示区数据(可选),包含 platforms 和 rss_feeds 列表
  62. ai_stats: AI 分析统计数据(可选),包含 total_news, analyzed_news, max_news_limit 等
  63. Returns:
  64. 分批后的消息内容列表
  65. """
  66. if region_order is None:
  67. region_order = DEFAULT_REGION_ORDER
  68. # 合并批次大小配置
  69. sizes = {**DEFAULT_BATCH_SIZES, **(batch_sizes or {})}
  70. if max_bytes is None:
  71. if format_type == "dingtalk":
  72. max_bytes = sizes.get("dingtalk", 20000)
  73. elif format_type == "feishu":
  74. max_bytes = sizes.get("feishu", 29000)
  75. elif format_type == "ntfy":
  76. max_bytes = sizes.get("ntfy", 3800)
  77. else:
  78. max_bytes = sizes.get("default", 4000)
  79. batches = []
  80. total_hotlist_count = sum(
  81. len(stat["titles"]) for stat in report_data["stats"] if stat["count"] > 0
  82. )
  83. total_titles = total_hotlist_count
  84. # 累加 RSS 条目数
  85. if rss_items:
  86. total_titles += sum(stat.get("count", 0) for stat in rss_items)
  87. now = get_time_func() if get_time_func else datetime.now()
  88. # 构建头部信息
  89. base_header = ""
  90. # 准备 AI 分析统计行(如果存在)
  91. ai_stats_line = ""
  92. if ai_stats and ai_stats.get("analyzed_news", 0) > 0:
  93. analyzed_news = ai_stats.get("analyzed_news", 0)
  94. total_news = ai_stats.get("total_news", 0)
  95. ai_mode = ai_stats.get("ai_mode", "")
  96. # 构建分析数显示:如果被截断则显示 "实际分析数/总可分析数"
  97. if total_news > analyzed_news:
  98. news_display = f"{analyzed_news}/{total_news}"
  99. else:
  100. news_display = str(analyzed_news)
  101. # 如果 AI 模式与推送模式不同,显示模式标识
  102. mode_suffix = ""
  103. if ai_mode and ai_mode != mode:
  104. mode_map = {
  105. "daily": "全天汇总",
  106. "current": "当前榜单",
  107. "incremental": "增量分析"
  108. }
  109. mode_label = mode_map.get(ai_mode, ai_mode)
  110. mode_suffix = f" ({mode_label})"
  111. if format_type in ("wework", "bark", "ntfy", "feishu", "dingtalk"):
  112. ai_stats_line = f"**AI 分析数:** {news_display}{mode_suffix}\n"
  113. elif format_type == "slack":
  114. ai_stats_line = f"*AI 分析数:* {news_display}{mode_suffix}\n"
  115. elif format_type == "telegram":
  116. ai_stats_line = f"AI 分析数: {news_display}{mode_suffix}\n"
  117. # 构建统一的头部(总是显示总新闻数、时间和类型)
  118. if format_type in ("wework", "bark"):
  119. base_header = f"**总新闻数:** {total_titles}\n"
  120. base_header += ai_stats_line
  121. base_header += f"**时间:** {now.strftime('%Y-%m-%d %H:%M:%S')}\n"
  122. base_header += f"**类型:** {report_type}\n\n"
  123. elif format_type == "telegram":
  124. base_header = f"总新闻数: {total_titles}\n"
  125. base_header += ai_stats_line
  126. base_header += f"时间: {now.strftime('%Y-%m-%d %H:%M:%S')}\n"
  127. base_header += f"类型: {report_type}\n\n"
  128. elif format_type == "ntfy":
  129. base_header = f"**总新闻数:** {total_titles}\n"
  130. base_header += ai_stats_line
  131. base_header += f"**时间:** {now.strftime('%Y-%m-%d %H:%M:%S')}\n"
  132. base_header += f"**类型:** {report_type}\n\n"
  133. elif format_type == "feishu":
  134. base_header = f"**总新闻数:** {total_titles}\n"
  135. base_header += ai_stats_line
  136. base_header += f"**时间:** {now.strftime('%Y-%m-%d %H:%M:%S')}\n"
  137. base_header += f"**类型:** {report_type}\n\n"
  138. base_header += "---\n\n"
  139. elif format_type == "dingtalk":
  140. base_header = f"**总新闻数:** {total_titles}\n"
  141. base_header += ai_stats_line
  142. base_header += f"**时间:** {now.strftime('%Y-%m-%d %H:%M:%S')}\n"
  143. base_header += f"**类型:** {report_type}\n\n"
  144. base_header += "---\n\n"
  145. elif format_type == "slack":
  146. base_header = f"*总新闻数:* {total_titles}\n"
  147. base_header += ai_stats_line
  148. base_header += f"*时间:* {now.strftime('%Y-%m-%d %H:%M:%S')}\n"
  149. base_header += f"*类型:* {report_type}\n\n"
  150. base_footer = ""
  151. if format_type in ("wework", "bark"):
  152. base_footer = f"\n\n\n> 更新时间:{now.strftime('%Y-%m-%d %H:%M:%S')}"
  153. if update_info:
  154. base_footer += f"\n> TrendRadar 发现新版本 **{update_info['remote_version']}**,当前 **{update_info['current_version']}**"
  155. elif format_type == "telegram":
  156. base_footer = f"\n\n更新时间:{now.strftime('%Y-%m-%d %H:%M:%S')}"
  157. if update_info:
  158. base_footer += f"\nTrendRadar 发现新版本 {update_info['remote_version']},当前 {update_info['current_version']}"
  159. elif format_type == "ntfy":
  160. base_footer = f"\n\n> 更新时间:{now.strftime('%Y-%m-%d %H:%M:%S')}"
  161. if update_info:
  162. base_footer += f"\n> TrendRadar 发现新版本 **{update_info['remote_version']}**,当前 **{update_info['current_version']}**"
  163. elif format_type == "feishu":
  164. base_footer = f"\n\n<font color='grey'>更新时间:{now.strftime('%Y-%m-%d %H:%M:%S')}</font>"
  165. if update_info:
  166. base_footer += f"\n<font color='grey'>TrendRadar 发现新版本 {update_info['remote_version']},当前 {update_info['current_version']}</font>"
  167. elif format_type == "dingtalk":
  168. base_footer = f"\n\n> 更新时间:{now.strftime('%Y-%m-%d %H:%M:%S')}"
  169. if update_info:
  170. base_footer += f"\n> TrendRadar 发现新版本 **{update_info['remote_version']}**,当前 **{update_info['current_version']}**"
  171. elif format_type == "slack":
  172. base_footer = f"\n\n_更新时间:{now.strftime('%Y-%m-%d %H:%M:%S')}_"
  173. if update_info:
  174. base_footer += f"\n_TrendRadar 发现新版本 *{update_info['remote_version']}*,当前 *{update_info['current_version']}_"
  175. # 根据 display_mode 选择统计标题
  176. stats_title = "热点词汇统计" if display_mode == "keyword" else "热点新闻统计"
  177. stats_header = ""
  178. if report_data["stats"]:
  179. if format_type in ("wework", "bark"):
  180. stats_header = f"📊 **{stats_title}** (共 {total_hotlist_count} 条)\n\n"
  181. elif format_type == "telegram":
  182. stats_header = f"📊 {stats_title} (共 {total_hotlist_count} 条)\n\n"
  183. elif format_type == "ntfy":
  184. stats_header = f"📊 **{stats_title}** (共 {total_hotlist_count} 条)\n\n"
  185. elif format_type == "feishu":
  186. stats_header = f"📊 **{stats_title}** (共 {total_hotlist_count} 条)\n\n"
  187. elif format_type == "dingtalk":
  188. stats_header = f"📊 **{stats_title}** (共 {total_hotlist_count} 条)\n\n"
  189. elif format_type == "slack":
  190. stats_header = f"📊 *{stats_title}* (共 {total_hotlist_count} 条)\n\n"
  191. current_batch = base_header
  192. current_batch_has_content = False
  193. # 当没有热榜数据时的处理
  194. # 注意:如果有 ai_content,不应该返回"暂无匹配"消息,而应该继续处理 AI 内容
  195. if (
  196. not report_data["stats"]
  197. and not report_data["new_titles"]
  198. and not report_data["failed_ids"]
  199. and not ai_content # 有 AI 内容时不返回"暂无匹配"
  200. and not rss_items # 有 RSS 内容时也不返回
  201. and not standalone_data # 有独立展示区数据时也不返回
  202. ):
  203. if mode == "incremental":
  204. mode_text = "增量模式下暂无新增匹配的热点词汇"
  205. elif mode == "current":
  206. mode_text = "当前榜单模式下暂无匹配的热点词汇"
  207. else:
  208. mode_text = "暂无匹配的热点词汇"
  209. simple_content = f"📭 {mode_text}\n\n"
  210. final_content = base_header + simple_content + base_footer
  211. batches.append(final_content)
  212. return batches
  213. # 定义处理热点词汇统计的函数
  214. def process_stats_section(current_batch, current_batch_has_content, batches, add_separator=True):
  215. """处理热点词汇统计"""
  216. if not report_data["stats"]:
  217. return current_batch, current_batch_has_content, batches
  218. total_count = len(report_data["stats"])
  219. # 根据 add_separator 决定是否添加前置分割线
  220. actual_stats_header = ""
  221. if add_separator and current_batch_has_content:
  222. # 需要添加分割线
  223. if format_type == "feishu":
  224. actual_stats_header = f"\n{feishu_separator}\n\n{stats_header}"
  225. elif format_type == "dingtalk":
  226. actual_stats_header = f"\n---\n\n{stats_header}"
  227. elif format_type in ("wework", "bark"):
  228. actual_stats_header = f"\n\n\n\n{stats_header}"
  229. else:
  230. actual_stats_header = f"\n\n{stats_header}"
  231. else:
  232. # 不需要分割线(第一个区域)
  233. actual_stats_header = stats_header
  234. # 添加统计标题
  235. test_content = current_batch + actual_stats_header
  236. if (
  237. len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
  238. < max_bytes
  239. ):
  240. current_batch = test_content
  241. current_batch_has_content = True
  242. else:
  243. if current_batch_has_content:
  244. batches.append(current_batch + base_footer)
  245. # 新批次开头不需要分割线,使用原始 stats_header
  246. current_batch = base_header + stats_header
  247. current_batch_has_content = True
  248. # 逐个处理词组(确保词组标题+第一条新闻的原子性)
  249. for i, stat in enumerate(report_data["stats"]):
  250. word = stat["word"]
  251. count = stat["count"]
  252. sequence_display = f"[{i + 1}/{total_count}]"
  253. # 构建词组标题
  254. word_header = ""
  255. if format_type in ("wework", "bark"):
  256. if count >= 10:
  257. word_header = (
  258. f"🔥 {sequence_display} **{word}** : **{count}** 条\n\n"
  259. )
  260. elif count >= 5:
  261. word_header = (
  262. f"📈 {sequence_display} **{word}** : **{count}** 条\n\n"
  263. )
  264. else:
  265. word_header = f"📌 {sequence_display} **{word}** : {count} 条\n\n"
  266. elif format_type == "telegram":
  267. if count >= 10:
  268. word_header = f"🔥 {sequence_display} {word} : {count} 条\n\n"
  269. elif count >= 5:
  270. word_header = f"📈 {sequence_display} {word} : {count} 条\n\n"
  271. else:
  272. word_header = f"📌 {sequence_display} {word} : {count} 条\n\n"
  273. elif format_type == "ntfy":
  274. if count >= 10:
  275. word_header = (
  276. f"🔥 {sequence_display} **{word}** : **{count}** 条\n\n"
  277. )
  278. elif count >= 5:
  279. word_header = (
  280. f"📈 {sequence_display} **{word}** : **{count}** 条\n\n"
  281. )
  282. else:
  283. word_header = f"📌 {sequence_display} **{word}** : {count} 条\n\n"
  284. elif format_type == "feishu":
  285. if count >= 10:
  286. word_header = f"🔥 <font color='grey'>{sequence_display}</font> **{word}** : <font color='red'>{count}</font> 条\n\n"
  287. elif count >= 5:
  288. word_header = f"📈 <font color='grey'>{sequence_display}</font> **{word}** : <font color='orange'>{count}</font> 条\n\n"
  289. else:
  290. word_header = f"📌 <font color='grey'>{sequence_display}</font> **{word}** : {count} 条\n\n"
  291. elif format_type == "dingtalk":
  292. if count >= 10:
  293. word_header = (
  294. f"🔥 {sequence_display} **{word}** : **{count}** 条\n\n"
  295. )
  296. elif count >= 5:
  297. word_header = (
  298. f"📈 {sequence_display} **{word}** : **{count}** 条\n\n"
  299. )
  300. else:
  301. word_header = f"📌 {sequence_display} **{word}** : {count} 条\n\n"
  302. elif format_type == "slack":
  303. if count >= 10:
  304. word_header = (
  305. f"🔥 {sequence_display} *{word}* : *{count}* 条\n\n"
  306. )
  307. elif count >= 5:
  308. word_header = (
  309. f"📈 {sequence_display} *{word}* : *{count}* 条\n\n"
  310. )
  311. else:
  312. word_header = f"📌 {sequence_display} *{word}* : {count} 条\n\n"
  313. # 构建第一条新闻
  314. # display_mode: keyword=显示来源, platform=显示关键词
  315. show_source = display_mode == "keyword"
  316. show_keyword = display_mode == "platform"
  317. first_news_line = ""
  318. if stat["titles"]:
  319. first_title_data = stat["titles"][0]
  320. if format_type in ("wework", "bark"):
  321. formatted_title = format_title_for_platform(
  322. "wework", first_title_data, show_source=show_source, show_keyword=show_keyword
  323. )
  324. elif format_type == "telegram":
  325. formatted_title = format_title_for_platform(
  326. "telegram", first_title_data, show_source=show_source, show_keyword=show_keyword
  327. )
  328. elif format_type == "ntfy":
  329. formatted_title = format_title_for_platform(
  330. "ntfy", first_title_data, show_source=show_source, show_keyword=show_keyword
  331. )
  332. elif format_type == "feishu":
  333. formatted_title = format_title_for_platform(
  334. "feishu", first_title_data, show_source=show_source, show_keyword=show_keyword
  335. )
  336. elif format_type == "dingtalk":
  337. formatted_title = format_title_for_platform(
  338. "dingtalk", first_title_data, show_source=show_source, show_keyword=show_keyword
  339. )
  340. elif format_type == "slack":
  341. formatted_title = format_title_for_platform(
  342. "slack", first_title_data, show_source=show_source, show_keyword=show_keyword
  343. )
  344. else:
  345. formatted_title = f"{first_title_data['title']}"
  346. first_news_line = f" 1. {formatted_title}\n"
  347. if len(stat["titles"]) > 1:
  348. first_news_line += "\n"
  349. # 原子性检查:词组标题+第一条新闻必须一起处理
  350. word_with_first_news = word_header + first_news_line
  351. test_content = current_batch + word_with_first_news
  352. if (
  353. len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
  354. >= max_bytes
  355. ):
  356. # 当前批次容纳不下,开启新批次
  357. if current_batch_has_content:
  358. batches.append(current_batch + base_footer)
  359. current_batch = base_header + stats_header + word_with_first_news
  360. current_batch_has_content = True
  361. start_index = 1
  362. else:
  363. current_batch = test_content
  364. current_batch_has_content = True
  365. start_index = 1
  366. # 处理剩余新闻条目
  367. for j in range(start_index, len(stat["titles"])):
  368. title_data = stat["titles"][j]
  369. if format_type in ("wework", "bark"):
  370. formatted_title = format_title_for_platform(
  371. "wework", title_data, show_source=show_source, show_keyword=show_keyword
  372. )
  373. elif format_type == "telegram":
  374. formatted_title = format_title_for_platform(
  375. "telegram", title_data, show_source=show_source, show_keyword=show_keyword
  376. )
  377. elif format_type == "ntfy":
  378. formatted_title = format_title_for_platform(
  379. "ntfy", title_data, show_source=show_source, show_keyword=show_keyword
  380. )
  381. elif format_type == "feishu":
  382. formatted_title = format_title_for_platform(
  383. "feishu", title_data, show_source=show_source, show_keyword=show_keyword
  384. )
  385. elif format_type == "dingtalk":
  386. formatted_title = format_title_for_platform(
  387. "dingtalk", title_data, show_source=show_source, show_keyword=show_keyword
  388. )
  389. elif format_type == "slack":
  390. formatted_title = format_title_for_platform(
  391. "slack", title_data, show_source=show_source, show_keyword=show_keyword
  392. )
  393. else:
  394. formatted_title = f"{title_data['title']}"
  395. news_line = f" {j + 1}. {formatted_title}\n"
  396. if j < len(stat["titles"]) - 1:
  397. news_line += "\n"
  398. test_content = current_batch + news_line
  399. if (
  400. len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
  401. >= max_bytes
  402. ):
  403. if current_batch_has_content:
  404. batches.append(current_batch + base_footer)
  405. current_batch = base_header + stats_header + word_header + news_line
  406. current_batch_has_content = True
  407. else:
  408. current_batch = test_content
  409. current_batch_has_content = True
  410. # 词组间分隔符
  411. if i < len(report_data["stats"]) - 1:
  412. separator = ""
  413. if format_type in ("wework", "bark"):
  414. separator = f"\n\n\n\n"
  415. elif format_type == "telegram":
  416. separator = f"\n\n"
  417. elif format_type == "ntfy":
  418. separator = f"\n\n"
  419. elif format_type == "feishu":
  420. separator = f"\n{feishu_separator}\n\n"
  421. elif format_type == "dingtalk":
  422. separator = f"\n---\n\n"
  423. elif format_type == "slack":
  424. separator = f"\n\n"
  425. test_content = current_batch + separator
  426. if (
  427. len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
  428. < max_bytes
  429. ):
  430. current_batch = test_content
  431. return current_batch, current_batch_has_content, batches
  432. # 定义处理新增新闻的函数
  433. def process_new_titles_section(current_batch, current_batch_has_content, batches, add_separator=True):
  434. """处理新增新闻"""
  435. if not show_new_section or not report_data["new_titles"]:
  436. return current_batch, current_batch_has_content, batches
  437. # 根据 add_separator 决定是否添加前置分割线
  438. new_header = ""
  439. if add_separator and current_batch_has_content:
  440. # 需要添加分割线
  441. if format_type in ("wework", "bark"):
  442. new_header = f"\n\n\n\n🆕 **本次新增热点新闻** (共 {report_data['total_new_count']} 条)\n\n"
  443. elif format_type == "telegram":
  444. new_header = (
  445. f"\n\n🆕 本次新增热点新闻 (共 {report_data['total_new_count']} 条)\n\n"
  446. )
  447. elif format_type == "ntfy":
  448. new_header = f"\n\n🆕 **本次新增热点新闻** (共 {report_data['total_new_count']} 条)\n\n"
  449. elif format_type == "feishu":
  450. new_header = f"\n{feishu_separator}\n\n🆕 **本次新增热点新闻** (共 {report_data['total_new_count']} 条)\n\n"
  451. elif format_type == "dingtalk":
  452. new_header = f"\n---\n\n🆕 **本次新增热点新闻** (共 {report_data['total_new_count']} 条)\n\n"
  453. elif format_type == "slack":
  454. new_header = f"\n\n🆕 *本次新增热点新闻* (共 {report_data['total_new_count']} 条)\n\n"
  455. else:
  456. # 不需要分割线(第一个区域)
  457. if format_type in ("wework", "bark"):
  458. new_header = f"🆕 **本次新增热点新闻** (共 {report_data['total_new_count']} 条)\n\n"
  459. elif format_type == "telegram":
  460. new_header = f"🆕 本次新增热点新闻 (共 {report_data['total_new_count']} 条)\n\n"
  461. elif format_type == "ntfy":
  462. new_header = f"🆕 **本次新增热点新闻** (共 {report_data['total_new_count']} 条)\n\n"
  463. elif format_type == "feishu":
  464. new_header = f"🆕 **本次新增热点新闻** (共 {report_data['total_new_count']} 条)\n\n"
  465. elif format_type == "dingtalk":
  466. new_header = f"🆕 **本次新增热点新闻** (共 {report_data['total_new_count']} 条)\n\n"
  467. elif format_type == "slack":
  468. new_header = f"🆕 *本次新增热点新闻* (共 {report_data['total_new_count']} 条)\n\n"
  469. test_content = current_batch + new_header
  470. if (
  471. len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
  472. >= max_bytes
  473. ):
  474. if current_batch_has_content:
  475. batches.append(current_batch + base_footer)
  476. current_batch = base_header + new_header
  477. current_batch_has_content = True
  478. else:
  479. current_batch = test_content
  480. current_batch_has_content = True
  481. # 逐个处理新增新闻来源
  482. for source_data in report_data["new_titles"]:
  483. source_header = ""
  484. if format_type in ("wework", "bark"):
  485. source_header = f"**{source_data['source_name']}** ({len(source_data['titles'])} 条):\n\n"
  486. elif format_type == "telegram":
  487. source_header = f"{source_data['source_name']} ({len(source_data['titles'])} 条):\n\n"
  488. elif format_type == "ntfy":
  489. source_header = f"**{source_data['source_name']}** ({len(source_data['titles'])} 条):\n\n"
  490. elif format_type == "feishu":
  491. source_header = f"**{source_data['source_name']}** ({len(source_data['titles'])} 条):\n\n"
  492. elif format_type == "dingtalk":
  493. source_header = f"**{source_data['source_name']}** ({len(source_data['titles'])} 条):\n\n"
  494. elif format_type == "slack":
  495. source_header = f"*{source_data['source_name']}* ({len(source_data['titles'])} 条):\n\n"
  496. # 构建第一条新增新闻
  497. first_news_line = ""
  498. if source_data["titles"]:
  499. first_title_data = source_data["titles"][0]
  500. title_data_copy = first_title_data.copy()
  501. title_data_copy["is_new"] = False
  502. if format_type in ("wework", "bark"):
  503. formatted_title = format_title_for_platform(
  504. "wework", title_data_copy, show_source=False
  505. )
  506. elif format_type == "telegram":
  507. formatted_title = format_title_for_platform(
  508. "telegram", title_data_copy, show_source=False
  509. )
  510. elif format_type == "feishu":
  511. formatted_title = format_title_for_platform(
  512. "feishu", title_data_copy, show_source=False
  513. )
  514. elif format_type == "dingtalk":
  515. formatted_title = format_title_for_platform(
  516. "dingtalk", title_data_copy, show_source=False
  517. )
  518. elif format_type == "slack":
  519. formatted_title = format_title_for_platform(
  520. "slack", title_data_copy, show_source=False
  521. )
  522. else:
  523. formatted_title = f"{title_data_copy['title']}"
  524. first_news_line = f" 1. {formatted_title}\n"
  525. # 原子性检查:来源标题+第一条新闻
  526. source_with_first_news = source_header + first_news_line
  527. test_content = current_batch + source_with_first_news
  528. if (
  529. len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
  530. >= max_bytes
  531. ):
  532. if current_batch_has_content:
  533. batches.append(current_batch + base_footer)
  534. current_batch = base_header + new_header + source_with_first_news
  535. current_batch_has_content = True
  536. start_index = 1
  537. else:
  538. current_batch = test_content
  539. current_batch_has_content = True
  540. start_index = 1
  541. # 处理剩余新增新闻
  542. for j in range(start_index, len(source_data["titles"])):
  543. title_data = source_data["titles"][j]
  544. title_data_copy = title_data.copy()
  545. title_data_copy["is_new"] = False
  546. if format_type == "wework":
  547. formatted_title = format_title_for_platform(
  548. "wework", title_data_copy, show_source=False
  549. )
  550. elif format_type == "telegram":
  551. formatted_title = format_title_for_platform(
  552. "telegram", title_data_copy, show_source=False
  553. )
  554. elif format_type == "feishu":
  555. formatted_title = format_title_for_platform(
  556. "feishu", title_data_copy, show_source=False
  557. )
  558. elif format_type == "dingtalk":
  559. formatted_title = format_title_for_platform(
  560. "dingtalk", title_data_copy, show_source=False
  561. )
  562. elif format_type == "slack":
  563. formatted_title = format_title_for_platform(
  564. "slack", title_data_copy, show_source=False
  565. )
  566. else:
  567. formatted_title = f"{title_data_copy['title']}"
  568. news_line = f" {j + 1}. {formatted_title}\n"
  569. test_content = current_batch + news_line
  570. if (
  571. len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
  572. >= max_bytes
  573. ):
  574. if current_batch_has_content:
  575. batches.append(current_batch + base_footer)
  576. current_batch = base_header + new_header + source_header + news_line
  577. current_batch_has_content = True
  578. else:
  579. current_batch = test_content
  580. current_batch_has_content = True
  581. current_batch += "\n"
  582. return current_batch, current_batch_has_content, batches
  583. # 定义处理 AI 分析的函数
  584. def process_ai_section(current_batch, current_batch_has_content, batches, add_separator=True):
  585. """处理 AI 分析内容"""
  586. nonlocal ai_content
  587. if not ai_content:
  588. return current_batch, current_batch_has_content, batches
  589. # 根据 add_separator 决定是否添加前置分割线
  590. ai_separator = ""
  591. if add_separator and current_batch_has_content:
  592. # 需要添加分割线
  593. if format_type == "feishu":
  594. ai_separator = f"\n{feishu_separator}\n\n"
  595. elif format_type == "dingtalk":
  596. ai_separator = "\n---\n\n"
  597. elif format_type in ("wework", "bark"):
  598. ai_separator = "\n\n\n\n"
  599. elif format_type in ("telegram", "ntfy", "slack"):
  600. ai_separator = "\n\n"
  601. # 如果不需要分割线,ai_separator 保持为空字符串
  602. # 尝试将 AI 内容添加到当前批次
  603. test_content = current_batch + ai_separator + ai_content
  604. if (
  605. len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
  606. < max_bytes
  607. ):
  608. current_batch = test_content
  609. current_batch_has_content = True
  610. else:
  611. # 当前批次容纳不下,开启新批次
  612. if current_batch_has_content:
  613. batches.append(current_batch + base_footer)
  614. # AI 内容可能很长,需要考虑是否需要进一步分割
  615. ai_with_header = base_header + ai_content
  616. current_batch = ai_with_header
  617. current_batch_has_content = True
  618. return current_batch, current_batch_has_content, batches
  619. # 定义处理独立展示区的函数
  620. def process_standalone_section_wrapper(current_batch, current_batch_has_content, batches, add_separator=True):
  621. """处理独立展示区"""
  622. if not standalone_data:
  623. return current_batch, current_batch_has_content, batches
  624. return _process_standalone_section(
  625. standalone_data, format_type, feishu_separator, base_header, base_footer,
  626. max_bytes, current_batch, current_batch_has_content, batches, timezone,
  627. rank_threshold, add_separator
  628. )
  629. # 定义处理 RSS 统计的函数
  630. def process_rss_stats_wrapper(current_batch, current_batch_has_content, batches, add_separator=True):
  631. """处理 RSS 统计"""
  632. if not rss_items:
  633. return current_batch, current_batch_has_content, batches
  634. return _process_rss_stats_section(
  635. rss_items, format_type, feishu_separator, base_header, base_footer,
  636. max_bytes, current_batch, current_batch_has_content, batches, timezone,
  637. add_separator
  638. )
  639. # 定义处理 RSS 新增的函数
  640. def process_rss_new_wrapper(current_batch, current_batch_has_content, batches, add_separator=True):
  641. """处理 RSS 新增"""
  642. if not rss_new_items:
  643. return current_batch, current_batch_has_content, batches
  644. return _process_rss_new_titles_section(
  645. rss_new_items, format_type, feishu_separator, base_header, base_footer,
  646. max_bytes, current_batch, current_batch_has_content, batches, timezone,
  647. add_separator
  648. )
  649. # 按 region_order 顺序处理各区域
  650. # 记录是否已有区域内容(用于决定是否添加分割线)
  651. has_region_content = False
  652. for region in region_order:
  653. # 记录处理前的状态,用于判断该区域是否产生了内容
  654. batch_before = current_batch
  655. has_content_before = current_batch_has_content
  656. batches_len_before = len(batches)
  657. # 决定是否需要添加分割线(第一个有内容的区域不需要)
  658. add_separator = has_region_content
  659. if region == "hotlist":
  660. # 处理热榜统计
  661. current_batch, current_batch_has_content, batches = process_stats_section(
  662. current_batch, current_batch_has_content, batches, add_separator
  663. )
  664. elif region == "rss":
  665. # 处理 RSS 统计
  666. current_batch, current_batch_has_content, batches = process_rss_stats_wrapper(
  667. current_batch, current_batch_has_content, batches, add_separator
  668. )
  669. elif region == "new_items":
  670. # 处理热榜新增
  671. current_batch, current_batch_has_content, batches = process_new_titles_section(
  672. current_batch, current_batch_has_content, batches, add_separator
  673. )
  674. # 处理 RSS 新增(跟随 new_items,继承 add_separator 逻辑)
  675. # 如果热榜新增产生了内容,RSS 新增需要分割线
  676. new_batch_changed = (
  677. current_batch != batch_before or
  678. current_batch_has_content != has_content_before or
  679. len(batches) != batches_len_before
  680. )
  681. rss_new_separator = new_batch_changed or has_region_content
  682. current_batch, current_batch_has_content, batches = process_rss_new_wrapper(
  683. current_batch, current_batch_has_content, batches, rss_new_separator
  684. )
  685. elif region == "standalone":
  686. # 处理独立展示区
  687. current_batch, current_batch_has_content, batches = process_standalone_section_wrapper(
  688. current_batch, current_batch_has_content, batches, add_separator
  689. )
  690. elif region == "ai_analysis":
  691. # 处理 AI 分析
  692. current_batch, current_batch_has_content, batches = process_ai_section(
  693. current_batch, current_batch_has_content, batches, add_separator
  694. )
  695. # 检查该区域是否产生了内容
  696. region_produced_content = (
  697. current_batch != batch_before or
  698. current_batch_has_content != has_content_before or
  699. len(batches) != batches_len_before
  700. )
  701. if region_produced_content:
  702. has_region_content = True
  703. if report_data["failed_ids"]:
  704. failed_header = ""
  705. if format_type == "wework":
  706. failed_header = f"\n\n\n\n⚠️ **数据获取失败的平台:**\n\n"
  707. elif format_type == "telegram":
  708. failed_header = f"\n\n⚠️ 数据获取失败的平台:\n\n"
  709. elif format_type == "ntfy":
  710. failed_header = f"\n\n⚠️ **数据获取失败的平台:**\n\n"
  711. elif format_type == "feishu":
  712. failed_header = f"\n{feishu_separator}\n\n⚠️ **数据获取失败的平台:**\n\n"
  713. elif format_type == "dingtalk":
  714. failed_header = f"\n---\n\n⚠️ **数据获取失败的平台:**\n\n"
  715. test_content = current_batch + failed_header
  716. if (
  717. len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
  718. >= max_bytes
  719. ):
  720. if current_batch_has_content:
  721. batches.append(current_batch + base_footer)
  722. current_batch = base_header + failed_header
  723. current_batch_has_content = True
  724. else:
  725. current_batch = test_content
  726. current_batch_has_content = True
  727. for i, id_value in enumerate(report_data["failed_ids"], 1):
  728. if format_type == "feishu":
  729. failed_line = f" • <font color='red'>{id_value}</font>\n"
  730. elif format_type == "dingtalk":
  731. failed_line = f" • **{id_value}**\n"
  732. else:
  733. failed_line = f" • {id_value}\n"
  734. test_content = current_batch + failed_line
  735. if (
  736. len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
  737. >= max_bytes
  738. ):
  739. if current_batch_has_content:
  740. batches.append(current_batch + base_footer)
  741. current_batch = base_header + failed_header + failed_line
  742. current_batch_has_content = True
  743. else:
  744. current_batch = test_content
  745. current_batch_has_content = True
  746. # 完成最后批次
  747. if current_batch_has_content:
  748. batches.append(current_batch + base_footer)
  749. return batches
  750. def _process_rss_stats_section(
  751. rss_stats: list,
  752. format_type: str,
  753. feishu_separator: str,
  754. base_header: str,
  755. base_footer: str,
  756. max_bytes: int,
  757. current_batch: str,
  758. current_batch_has_content: bool,
  759. batches: List[str],
  760. timezone: str = DEFAULT_TIMEZONE,
  761. add_separator: bool = True,
  762. ) -> tuple:
  763. """处理 RSS 统计区块(按关键词分组,与热榜统计格式一致)
  764. Args:
  765. rss_stats: RSS 关键词统计列表,格式与热榜 stats 一致:
  766. [{"word": "AI", "count": 5, "titles": [...]}]
  767. format_type: 格式类型
  768. feishu_separator: 飞书分隔符
  769. base_header: 基础头部
  770. base_footer: 基础尾部
  771. max_bytes: 最大字节数
  772. current_batch: 当前批次内容
  773. current_batch_has_content: 当前批次是否有内容
  774. batches: 已完成的批次列表
  775. timezone: 时区名称
  776. add_separator: 是否在区块前添加分割线(第一个区域时为 False)
  777. Returns:
  778. (current_batch, current_batch_has_content, batches) 元组
  779. """
  780. if not rss_stats:
  781. return current_batch, current_batch_has_content, batches
  782. # 计算总条目数
  783. total_items = sum(stat["count"] for stat in rss_stats)
  784. total_keywords = len(rss_stats)
  785. # RSS 统计区块标题(根据 add_separator 决定是否添加前置分割线)
  786. rss_header = ""
  787. if add_separator and current_batch_has_content:
  788. # 需要添加分割线
  789. if format_type == "feishu":
  790. rss_header = f"\n{feishu_separator}\n\n📰 **RSS 订阅统计** (共 {total_items} 条)\n\n"
  791. elif format_type == "dingtalk":
  792. rss_header = f"\n---\n\n📰 **RSS 订阅统计** (共 {total_items} 条)\n\n"
  793. elif format_type in ("wework", "bark"):
  794. rss_header = f"\n\n\n\n📰 **RSS 订阅统计** (共 {total_items} 条)\n\n"
  795. elif format_type == "telegram":
  796. rss_header = f"\n\n📰 RSS 订阅统计 (共 {total_items} 条)\n\n"
  797. elif format_type == "slack":
  798. rss_header = f"\n\n📰 *RSS 订阅统计* (共 {total_items} 条)\n\n"
  799. else:
  800. rss_header = f"\n\n📰 **RSS 订阅统计** (共 {total_items} 条)\n\n"
  801. else:
  802. # 不需要分割线(第一个区域)
  803. if format_type == "feishu":
  804. rss_header = f"📰 **RSS 订阅统计** (共 {total_items} 条)\n\n"
  805. elif format_type == "dingtalk":
  806. rss_header = f"📰 **RSS 订阅统计** (共 {total_items} 条)\n\n"
  807. elif format_type == "telegram":
  808. rss_header = f"📰 RSS 订阅统计 (共 {total_items} 条)\n\n"
  809. elif format_type == "slack":
  810. rss_header = f"📰 *RSS 订阅统计* (共 {total_items} 条)\n\n"
  811. else:
  812. rss_header = f"📰 **RSS 订阅统计** (共 {total_items} 条)\n\n"
  813. # 添加 RSS 标题
  814. test_content = current_batch + rss_header
  815. if len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8")) < max_bytes:
  816. current_batch = test_content
  817. current_batch_has_content = True
  818. else:
  819. if current_batch_has_content:
  820. batches.append(current_batch + base_footer)
  821. current_batch = base_header + rss_header
  822. current_batch_has_content = True
  823. # 逐个处理关键词组(与热榜一致)
  824. for i, stat in enumerate(rss_stats):
  825. word = stat["word"]
  826. count = stat["count"]
  827. sequence_display = f"[{i + 1}/{total_keywords}]"
  828. # 构建关键词标题(与热榜格式一致)
  829. word_header = ""
  830. if format_type in ("wework", "bark"):
  831. if count >= 10:
  832. word_header = f"🔥 {sequence_display} **{word}** : **{count}** 条\n\n"
  833. elif count >= 5:
  834. word_header = f"📈 {sequence_display} **{word}** : **{count}** 条\n\n"
  835. else:
  836. word_header = f"📌 {sequence_display} **{word}** : {count} 条\n\n"
  837. elif format_type == "telegram":
  838. if count >= 10:
  839. word_header = f"🔥 {sequence_display} {word} : {count} 条\n\n"
  840. elif count >= 5:
  841. word_header = f"📈 {sequence_display} {word} : {count} 条\n\n"
  842. else:
  843. word_header = f"📌 {sequence_display} {word} : {count} 条\n\n"
  844. elif format_type == "ntfy":
  845. if count >= 10:
  846. word_header = f"🔥 {sequence_display} **{word}** : **{count}** 条\n\n"
  847. elif count >= 5:
  848. word_header = f"📈 {sequence_display} **{word}** : **{count}** 条\n\n"
  849. else:
  850. word_header = f"📌 {sequence_display} **{word}** : {count} 条\n\n"
  851. elif format_type == "feishu":
  852. if count >= 10:
  853. word_header = f"🔥 <font color='grey'>{sequence_display}</font> **{word}** : <font color='red'>{count}</font> 条\n\n"
  854. elif count >= 5:
  855. word_header = f"📈 <font color='grey'>{sequence_display}</font> **{word}** : <font color='orange'>{count}</font> 条\n\n"
  856. else:
  857. word_header = f"📌 <font color='grey'>{sequence_display}</font> **{word}** : {count} 条\n\n"
  858. elif format_type == "dingtalk":
  859. if count >= 10:
  860. word_header = f"🔥 {sequence_display} **{word}** : **{count}** 条\n\n"
  861. elif count >= 5:
  862. word_header = f"📈 {sequence_display} **{word}** : **{count}** 条\n\n"
  863. else:
  864. word_header = f"📌 {sequence_display} **{word}** : {count} 条\n\n"
  865. elif format_type == "slack":
  866. if count >= 10:
  867. word_header = f"🔥 {sequence_display} *{word}* : *{count}* 条\n\n"
  868. elif count >= 5:
  869. word_header = f"📈 {sequence_display} *{word}* : *{count}* 条\n\n"
  870. else:
  871. word_header = f"📌 {sequence_display} *{word}* : {count} 条\n\n"
  872. # 构建第一条新闻(使用 format_title_for_platform)
  873. first_news_line = ""
  874. if stat["titles"]:
  875. first_title_data = stat["titles"][0]
  876. if format_type in ("wework", "bark"):
  877. formatted_title = format_title_for_platform("wework", first_title_data, show_source=True)
  878. elif format_type == "telegram":
  879. formatted_title = format_title_for_platform("telegram", first_title_data, show_source=True)
  880. elif format_type == "ntfy":
  881. formatted_title = format_title_for_platform("ntfy", first_title_data, show_source=True)
  882. elif format_type == "feishu":
  883. formatted_title = format_title_for_platform("feishu", first_title_data, show_source=True)
  884. elif format_type == "dingtalk":
  885. formatted_title = format_title_for_platform("dingtalk", first_title_data, show_source=True)
  886. elif format_type == "slack":
  887. formatted_title = format_title_for_platform("slack", first_title_data, show_source=True)
  888. else:
  889. formatted_title = f"{first_title_data['title']}"
  890. first_news_line = f" 1. {formatted_title}\n"
  891. if len(stat["titles"]) > 1:
  892. first_news_line += "\n"
  893. # 原子性检查:关键词标题 + 第一条新闻必须一起处理
  894. word_with_first_news = word_header + first_news_line
  895. test_content = current_batch + word_with_first_news
  896. if len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8")) >= max_bytes:
  897. if current_batch_has_content:
  898. batches.append(current_batch + base_footer)
  899. current_batch = base_header + rss_header + word_with_first_news
  900. current_batch_has_content = True
  901. start_index = 1
  902. else:
  903. current_batch = test_content
  904. current_batch_has_content = True
  905. start_index = 1
  906. # 处理剩余新闻条目
  907. for j in range(start_index, len(stat["titles"])):
  908. title_data = stat["titles"][j]
  909. if format_type in ("wework", "bark"):
  910. formatted_title = format_title_for_platform("wework", title_data, show_source=True)
  911. elif format_type == "telegram":
  912. formatted_title = format_title_for_platform("telegram", title_data, show_source=True)
  913. elif format_type == "ntfy":
  914. formatted_title = format_title_for_platform("ntfy", title_data, show_source=True)
  915. elif format_type == "feishu":
  916. formatted_title = format_title_for_platform("feishu", title_data, show_source=True)
  917. elif format_type == "dingtalk":
  918. formatted_title = format_title_for_platform("dingtalk", title_data, show_source=True)
  919. elif format_type == "slack":
  920. formatted_title = format_title_for_platform("slack", title_data, show_source=True)
  921. else:
  922. formatted_title = f"{title_data['title']}"
  923. news_line = f" {j + 1}. {formatted_title}\n"
  924. if j < len(stat["titles"]) - 1:
  925. news_line += "\n"
  926. test_content = current_batch + news_line
  927. if len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8")) >= max_bytes:
  928. if current_batch_has_content:
  929. batches.append(current_batch + base_footer)
  930. current_batch = base_header + rss_header + word_header + news_line
  931. current_batch_has_content = True
  932. else:
  933. current_batch = test_content
  934. current_batch_has_content = True
  935. # 关键词间分隔符
  936. if i < len(rss_stats) - 1:
  937. separator = ""
  938. if format_type in ("wework", "bark"):
  939. separator = "\n\n\n\n"
  940. elif format_type == "telegram":
  941. separator = "\n\n"
  942. elif format_type == "ntfy":
  943. separator = "\n\n"
  944. elif format_type == "feishu":
  945. separator = f"\n{feishu_separator}\n\n"
  946. elif format_type == "dingtalk":
  947. separator = "\n---\n\n"
  948. elif format_type == "slack":
  949. separator = "\n\n"
  950. test_content = current_batch + separator
  951. if len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8")) < max_bytes:
  952. current_batch = test_content
  953. return current_batch, current_batch_has_content, batches
  954. def _process_rss_new_titles_section(
  955. rss_new_stats: list,
  956. format_type: str,
  957. feishu_separator: str,
  958. base_header: str,
  959. base_footer: str,
  960. max_bytes: int,
  961. current_batch: str,
  962. current_batch_has_content: bool,
  963. batches: List[str],
  964. timezone: str = DEFAULT_TIMEZONE,
  965. add_separator: bool = True,
  966. ) -> tuple:
  967. """处理 RSS 新增区块(按来源分组,与热榜新增格式一致)
  968. Args:
  969. rss_new_stats: RSS 新增关键词统计列表,格式与热榜 stats 一致:
  970. [{"word": "AI", "count": 5, "titles": [...]}]
  971. format_type: 格式类型
  972. feishu_separator: 飞书分隔符
  973. base_header: 基础头部
  974. base_footer: 基础尾部
  975. max_bytes: 最大字节数
  976. current_batch: 当前批次内容
  977. current_batch_has_content: 当前批次是否有内容
  978. batches: 已完成的批次列表
  979. timezone: 时区名称
  980. add_separator: 是否在区块前添加分割线(第一个区域时为 False)
  981. Returns:
  982. (current_batch, current_batch_has_content, batches) 元组
  983. """
  984. if not rss_new_stats:
  985. return current_batch, current_batch_has_content, batches
  986. # 从关键词分组中提取所有条目,重新按来源分组
  987. source_map = {}
  988. for stat in rss_new_stats:
  989. for title_data in stat.get("titles", []):
  990. source_name = title_data.get("source_name", "未知来源")
  991. if source_name not in source_map:
  992. source_map[source_name] = []
  993. source_map[source_name].append(title_data)
  994. if not source_map:
  995. return current_batch, current_batch_has_content, batches
  996. # 计算总条目数
  997. total_items = sum(len(titles) for titles in source_map.values())
  998. # RSS 新增区块标题(根据 add_separator 决定是否添加前置分割线)
  999. new_header = ""
  1000. if add_separator and current_batch_has_content:
  1001. # 需要添加分割线
  1002. if format_type in ("wework", "bark"):
  1003. new_header = f"\n\n\n\n🆕 **RSS 本次新增** (共 {total_items} 条)\n\n"
  1004. elif format_type == "telegram":
  1005. new_header = f"\n\n🆕 RSS 本次新增 (共 {total_items} 条)\n\n"
  1006. elif format_type == "ntfy":
  1007. new_header = f"\n\n🆕 **RSS 本次新增** (共 {total_items} 条)\n\n"
  1008. elif format_type == "feishu":
  1009. new_header = f"\n{feishu_separator}\n\n🆕 **RSS 本次新增** (共 {total_items} 条)\n\n"
  1010. elif format_type == "dingtalk":
  1011. new_header = f"\n---\n\n🆕 **RSS 本次新增** (共 {total_items} 条)\n\n"
  1012. elif format_type == "slack":
  1013. new_header = f"\n\n🆕 *RSS 本次新增* (共 {total_items} 条)\n\n"
  1014. else:
  1015. # 不需要分割线(第一个区域)
  1016. if format_type in ("wework", "bark"):
  1017. new_header = f"🆕 **RSS 本次新增** (共 {total_items} 条)\n\n"
  1018. elif format_type == "telegram":
  1019. new_header = f"🆕 RSS 本次新增 (共 {total_items} 条)\n\n"
  1020. elif format_type == "ntfy":
  1021. new_header = f"🆕 **RSS 本次新增** (共 {total_items} 条)\n\n"
  1022. elif format_type == "feishu":
  1023. new_header = f"🆕 **RSS 本次新增** (共 {total_items} 条)\n\n"
  1024. elif format_type == "dingtalk":
  1025. new_header = f"🆕 **RSS 本次新增** (共 {total_items} 条)\n\n"
  1026. elif format_type == "slack":
  1027. new_header = f"🆕 *RSS 本次新增* (共 {total_items} 条)\n\n"
  1028. # 添加 RSS 新增标题
  1029. test_content = current_batch + new_header
  1030. if len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8")) >= max_bytes:
  1031. if current_batch_has_content:
  1032. batches.append(current_batch + base_footer)
  1033. current_batch = base_header + new_header
  1034. current_batch_has_content = True
  1035. else:
  1036. current_batch = test_content
  1037. current_batch_has_content = True
  1038. # 按来源分组显示(与热榜新增格式一致)
  1039. source_list = list(source_map.items())
  1040. for i, (source_name, titles) in enumerate(source_list):
  1041. count = len(titles)
  1042. # 构建来源标题(与热榜新增格式一致)
  1043. source_header = ""
  1044. if format_type in ("wework", "bark"):
  1045. source_header = f"**{source_name}** ({count} 条):\n\n"
  1046. elif format_type == "telegram":
  1047. source_header = f"{source_name} ({count} 条):\n\n"
  1048. elif format_type == "ntfy":
  1049. source_header = f"**{source_name}** ({count} 条):\n\n"
  1050. elif format_type == "feishu":
  1051. source_header = f"**{source_name}** ({count} 条):\n\n"
  1052. elif format_type == "dingtalk":
  1053. source_header = f"**{source_name}** ({count} 条):\n\n"
  1054. elif format_type == "slack":
  1055. source_header = f"*{source_name}* ({count} 条):\n\n"
  1056. # 构建第一条新闻(不显示来源,禁用 new emoji)
  1057. first_news_line = ""
  1058. if titles:
  1059. first_title_data = titles[0].copy()
  1060. first_title_data["is_new"] = False
  1061. if format_type in ("wework", "bark"):
  1062. formatted_title = format_title_for_platform("wework", first_title_data, show_source=False)
  1063. elif format_type == "telegram":
  1064. formatted_title = format_title_for_platform("telegram", first_title_data, show_source=False)
  1065. elif format_type == "ntfy":
  1066. formatted_title = format_title_for_platform("ntfy", first_title_data, show_source=False)
  1067. elif format_type == "feishu":
  1068. formatted_title = format_title_for_platform("feishu", first_title_data, show_source=False)
  1069. elif format_type == "dingtalk":
  1070. formatted_title = format_title_for_platform("dingtalk", first_title_data, show_source=False)
  1071. elif format_type == "slack":
  1072. formatted_title = format_title_for_platform("slack", first_title_data, show_source=False)
  1073. else:
  1074. formatted_title = f"{first_title_data['title']}"
  1075. first_news_line = f" 1. {formatted_title}\n"
  1076. # 原子性检查:来源标题 + 第一条新闻必须一起处理
  1077. source_with_first_news = source_header + first_news_line
  1078. test_content = current_batch + source_with_first_news
  1079. if len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8")) >= max_bytes:
  1080. if current_batch_has_content:
  1081. batches.append(current_batch + base_footer)
  1082. current_batch = base_header + new_header + source_with_first_news
  1083. current_batch_has_content = True
  1084. start_index = 1
  1085. else:
  1086. current_batch = test_content
  1087. current_batch_has_content = True
  1088. start_index = 1
  1089. # 处理剩余新闻条目(禁用 new emoji)
  1090. for j in range(start_index, len(titles)):
  1091. title_data = titles[j].copy()
  1092. title_data["is_new"] = False
  1093. if format_type in ("wework", "bark"):
  1094. formatted_title = format_title_for_platform("wework", title_data, show_source=False)
  1095. elif format_type == "telegram":
  1096. formatted_title = format_title_for_platform("telegram", title_data, show_source=False)
  1097. elif format_type == "ntfy":
  1098. formatted_title = format_title_for_platform("ntfy", title_data, show_source=False)
  1099. elif format_type == "feishu":
  1100. formatted_title = format_title_for_platform("feishu", title_data, show_source=False)
  1101. elif format_type == "dingtalk":
  1102. formatted_title = format_title_for_platform("dingtalk", title_data, show_source=False)
  1103. elif format_type == "slack":
  1104. formatted_title = format_title_for_platform("slack", title_data, show_source=False)
  1105. else:
  1106. formatted_title = f"{title_data['title']}"
  1107. news_line = f" {j + 1}. {formatted_title}\n"
  1108. test_content = current_batch + news_line
  1109. if len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8")) >= max_bytes:
  1110. if current_batch_has_content:
  1111. batches.append(current_batch + base_footer)
  1112. current_batch = base_header + new_header + source_header + news_line
  1113. current_batch_has_content = True
  1114. else:
  1115. current_batch = test_content
  1116. current_batch_has_content = True
  1117. # 来源间添加空行(与热榜新增格式一致)
  1118. current_batch += "\n"
  1119. return current_batch, current_batch_has_content, batches
  1120. def _format_rss_item_line(
  1121. item: Dict,
  1122. index: int,
  1123. format_type: str,
  1124. timezone: str = DEFAULT_TIMEZONE,
  1125. ) -> str:
  1126. """格式化单条 RSS 条目
  1127. Args:
  1128. item: RSS 条目字典
  1129. index: 序号
  1130. format_type: 格式类型
  1131. timezone: 时区名称
  1132. Returns:
  1133. 格式化后的条目行字符串
  1134. """
  1135. title = item.get("title", "")
  1136. url = item.get("url", "")
  1137. published_at = item.get("published_at", "")
  1138. # 使用友好时间格式
  1139. if published_at:
  1140. friendly_time = format_iso_time_friendly(published_at, timezone, include_date=True)
  1141. else:
  1142. friendly_time = ""
  1143. # 构建条目行
  1144. if format_type == "feishu":
  1145. if url:
  1146. item_line = f" {index}. [{title}]({url})"
  1147. else:
  1148. item_line = f" {index}. {title}"
  1149. if friendly_time:
  1150. item_line += f" <font color='grey'>- {friendly_time}</font>"
  1151. elif format_type == "telegram":
  1152. if url:
  1153. item_line = f" {index}. {title} ({url})"
  1154. else:
  1155. item_line = f" {index}. {title}"
  1156. if friendly_time:
  1157. item_line += f" - {friendly_time}"
  1158. else:
  1159. if url:
  1160. item_line = f" {index}. [{title}]({url})"
  1161. else:
  1162. item_line = f" {index}. {title}"
  1163. if friendly_time:
  1164. item_line += f" `{friendly_time}`"
  1165. item_line += "\n"
  1166. return item_line
  1167. def _process_standalone_section(
  1168. standalone_data: Dict,
  1169. format_type: str,
  1170. feishu_separator: str,
  1171. base_header: str,
  1172. base_footer: str,
  1173. max_bytes: int,
  1174. current_batch: str,
  1175. current_batch_has_content: bool,
  1176. batches: List[str],
  1177. timezone: str = DEFAULT_TIMEZONE,
  1178. rank_threshold: int = 10,
  1179. add_separator: bool = True,
  1180. ) -> tuple:
  1181. """处理独立展示区区块
  1182. 独立展示区显示指定平台的完整热榜或 RSS 源内容,不受关键词过滤影响。
  1183. 热榜按原始排名排序,RSS 按发布时间排序。
  1184. Args:
  1185. standalone_data: 独立展示数据,格式:
  1186. {
  1187. "platforms": [{"id": "zhihu", "name": "知乎热榜", "items": [...]}],
  1188. "rss_feeds": [{"id": "hacker-news", "name": "Hacker News", "items": [...]}]
  1189. }
  1190. format_type: 格式类型
  1191. feishu_separator: 飞书分隔符
  1192. base_header: 基础头部
  1193. base_footer: 基础尾部
  1194. max_bytes: 最大字节数
  1195. current_batch: 当前批次内容
  1196. current_batch_has_content: 当前批次是否有内容
  1197. batches: 已完成的批次列表
  1198. timezone: 时区名称
  1199. rank_threshold: 排名高亮阈值
  1200. add_separator: 是否在区块前添加分割线(第一个区域时为 False)
  1201. Returns:
  1202. (current_batch, current_batch_has_content, batches) 元组
  1203. """
  1204. if not standalone_data:
  1205. return current_batch, current_batch_has_content, batches
  1206. platforms = standalone_data.get("platforms", [])
  1207. rss_feeds = standalone_data.get("rss_feeds", [])
  1208. if not platforms and not rss_feeds:
  1209. return current_batch, current_batch_has_content, batches
  1210. # 计算总条目数
  1211. total_platform_items = sum(len(p.get("items", [])) for p in platforms)
  1212. total_rss_items = sum(len(f.get("items", [])) for f in rss_feeds)
  1213. total_items = total_platform_items + total_rss_items
  1214. # 独立展示区标题(根据 add_separator 决定是否添加前置分割线)
  1215. section_header = ""
  1216. if add_separator and current_batch_has_content:
  1217. # 需要添加分割线
  1218. if format_type == "feishu":
  1219. section_header = f"\n{feishu_separator}\n\n📋 **独立展示区** (共 {total_items} 条)\n\n"
  1220. elif format_type == "dingtalk":
  1221. section_header = f"\n---\n\n📋 **独立展示区** (共 {total_items} 条)\n\n"
  1222. elif format_type in ("wework", "bark"):
  1223. section_header = f"\n\n\n\n📋 **独立展示区** (共 {total_items} 条)\n\n"
  1224. elif format_type == "telegram":
  1225. section_header = f"\n\n📋 独立展示区 (共 {total_items} 条)\n\n"
  1226. elif format_type == "slack":
  1227. section_header = f"\n\n📋 *独立展示区* (共 {total_items} 条)\n\n"
  1228. else:
  1229. section_header = f"\n\n📋 **独立展示区** (共 {total_items} 条)\n\n"
  1230. else:
  1231. # 不需要分割线(第一个区域)
  1232. if format_type == "feishu":
  1233. section_header = f"📋 **独立展示区** (共 {total_items} 条)\n\n"
  1234. elif format_type == "dingtalk":
  1235. section_header = f"📋 **独立展示区** (共 {total_items} 条)\n\n"
  1236. elif format_type == "telegram":
  1237. section_header = f"📋 独立展示区 (共 {total_items} 条)\n\n"
  1238. elif format_type == "slack":
  1239. section_header = f"📋 *独立展示区* (共 {total_items} 条)\n\n"
  1240. else:
  1241. section_header = f"📋 **独立展示区** (共 {total_items} 条)\n\n"
  1242. # 添加区块标题
  1243. test_content = current_batch + section_header
  1244. if len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8")) < max_bytes:
  1245. current_batch = test_content
  1246. current_batch_has_content = True
  1247. else:
  1248. if current_batch_has_content:
  1249. batches.append(current_batch + base_footer)
  1250. current_batch = base_header + section_header
  1251. current_batch_has_content = True
  1252. # 处理热榜平台
  1253. for platform in platforms:
  1254. platform_name = platform.get("name", platform.get("id", ""))
  1255. items = platform.get("items", [])
  1256. if not items:
  1257. continue
  1258. # 平台标题
  1259. platform_header = ""
  1260. if format_type in ("wework", "bark"):
  1261. platform_header = f"**{platform_name}** ({len(items)} 条):\n\n"
  1262. elif format_type == "telegram":
  1263. platform_header = f"{platform_name} ({len(items)} 条):\n\n"
  1264. elif format_type == "ntfy":
  1265. platform_header = f"**{platform_name}** ({len(items)} 条):\n\n"
  1266. elif format_type == "feishu":
  1267. platform_header = f"**{platform_name}** ({len(items)} 条):\n\n"
  1268. elif format_type == "dingtalk":
  1269. platform_header = f"**{platform_name}** ({len(items)} 条):\n\n"
  1270. elif format_type == "slack":
  1271. platform_header = f"*{platform_name}* ({len(items)} 条):\n\n"
  1272. # 构建第一条新闻
  1273. first_item_line = ""
  1274. if items:
  1275. first_item_line = _format_standalone_platform_item(items[0], 1, format_type, rank_threshold)
  1276. # 原子性检查
  1277. platform_with_first = platform_header + first_item_line
  1278. test_content = current_batch + platform_with_first
  1279. if len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8")) >= max_bytes:
  1280. if current_batch_has_content:
  1281. batches.append(current_batch + base_footer)
  1282. current_batch = base_header + section_header + platform_with_first
  1283. current_batch_has_content = True
  1284. start_index = 1
  1285. else:
  1286. current_batch = test_content
  1287. current_batch_has_content = True
  1288. start_index = 1
  1289. # 处理剩余条目
  1290. for j in range(start_index, len(items)):
  1291. item_line = _format_standalone_platform_item(items[j], j + 1, format_type, rank_threshold)
  1292. test_content = current_batch + item_line
  1293. if len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8")) >= max_bytes:
  1294. if current_batch_has_content:
  1295. batches.append(current_batch + base_footer)
  1296. current_batch = base_header + section_header + platform_header + item_line
  1297. current_batch_has_content = True
  1298. else:
  1299. current_batch = test_content
  1300. current_batch_has_content = True
  1301. current_batch += "\n"
  1302. # 处理 RSS 源
  1303. for feed in rss_feeds:
  1304. feed_name = feed.get("name", feed.get("id", ""))
  1305. items = feed.get("items", [])
  1306. if not items:
  1307. continue
  1308. # RSS 源标题
  1309. feed_header = ""
  1310. if format_type in ("wework", "bark"):
  1311. feed_header = f"**{feed_name}** ({len(items)} 条):\n\n"
  1312. elif format_type == "telegram":
  1313. feed_header = f"{feed_name} ({len(items)} 条):\n\n"
  1314. elif format_type == "ntfy":
  1315. feed_header = f"**{feed_name}** ({len(items)} 条):\n\n"
  1316. elif format_type == "feishu":
  1317. feed_header = f"**{feed_name}** ({len(items)} 条):\n\n"
  1318. elif format_type == "dingtalk":
  1319. feed_header = f"**{feed_name}** ({len(items)} 条):\n\n"
  1320. elif format_type == "slack":
  1321. feed_header = f"*{feed_name}* ({len(items)} 条):\n\n"
  1322. # 构建第一条 RSS
  1323. first_item_line = ""
  1324. if items:
  1325. first_item_line = _format_standalone_rss_item(items[0], 1, format_type, timezone)
  1326. # 原子性检查
  1327. feed_with_first = feed_header + first_item_line
  1328. test_content = current_batch + feed_with_first
  1329. if len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8")) >= max_bytes:
  1330. if current_batch_has_content:
  1331. batches.append(current_batch + base_footer)
  1332. current_batch = base_header + section_header + feed_with_first
  1333. current_batch_has_content = True
  1334. start_index = 1
  1335. else:
  1336. current_batch = test_content
  1337. current_batch_has_content = True
  1338. start_index = 1
  1339. # 处理剩余条目
  1340. for j in range(start_index, len(items)):
  1341. item_line = _format_standalone_rss_item(items[j], j + 1, format_type, timezone)
  1342. test_content = current_batch + item_line
  1343. if len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8")) >= max_bytes:
  1344. if current_batch_has_content:
  1345. batches.append(current_batch + base_footer)
  1346. current_batch = base_header + section_header + feed_header + item_line
  1347. current_batch_has_content = True
  1348. else:
  1349. current_batch = test_content
  1350. current_batch_has_content = True
  1351. current_batch += "\n"
  1352. return current_batch, current_batch_has_content, batches
  1353. def _format_standalone_platform_item(item: Dict, index: int, format_type: str, rank_threshold: int = 10) -> str:
  1354. """格式化独立展示区的热榜条目(复用热点词汇统计区样式)
  1355. Args:
  1356. item: 热榜条目,包含 title, url, rank, ranks, first_time, last_time, count
  1357. index: 序号
  1358. format_type: 格式类型
  1359. rank_threshold: 排名高亮阈值
  1360. Returns:
  1361. 格式化后的条目行字符串
  1362. """
  1363. title = item.get("title", "")
  1364. url = item.get("url", "") or item.get("mobileUrl", "")
  1365. ranks = item.get("ranks", [])
  1366. rank = item.get("rank", 0)
  1367. first_time = item.get("first_time", "")
  1368. last_time = item.get("last_time", "")
  1369. count = item.get("count", 1)
  1370. # 使用 format_rank_display 格式化排名(复用热点词汇统计区逻辑)
  1371. # 如果没有 ranks 列表,用单个 rank 构造
  1372. if not ranks and rank > 0:
  1373. ranks = [rank]
  1374. rank_display = format_rank_display(ranks, rank_threshold, format_type) if ranks else ""
  1375. # 构建时间显示(用 ~ 连接范围,与热点词汇统计区一致)
  1376. # 将 HH-MM 格式转换为 HH:MM 格式
  1377. time_display = ""
  1378. if first_time and last_time and first_time != last_time:
  1379. first_time_display = convert_time_for_display(first_time)
  1380. last_time_display = convert_time_for_display(last_time)
  1381. time_display = f"{first_time_display}~{last_time_display}"
  1382. elif first_time:
  1383. time_display = convert_time_for_display(first_time)
  1384. # 构建次数显示(格式为 (N次),与热点词汇统计区一致)
  1385. count_display = f"({count}次)" if count > 1 else ""
  1386. # 根据格式类型构建条目行(复用热点词汇统计区样式)
  1387. if format_type == "feishu":
  1388. if url:
  1389. item_line = f" {index}. [{title}]({url})"
  1390. else:
  1391. item_line = f" {index}. {title}"
  1392. if rank_display:
  1393. item_line += f" {rank_display}"
  1394. if time_display:
  1395. item_line += f" <font color='grey'>- {time_display}</font>"
  1396. if count_display:
  1397. item_line += f" <font color='green'>{count_display}</font>"
  1398. elif format_type == "dingtalk":
  1399. if url:
  1400. item_line = f" {index}. [{title}]({url})"
  1401. else:
  1402. item_line = f" {index}. {title}"
  1403. if rank_display:
  1404. item_line += f" {rank_display}"
  1405. if time_display:
  1406. item_line += f" - {time_display}"
  1407. if count_display:
  1408. item_line += f" {count_display}"
  1409. elif format_type == "telegram":
  1410. if url:
  1411. item_line = f" {index}. {title} ({url})"
  1412. else:
  1413. item_line = f" {index}. {title}"
  1414. if rank_display:
  1415. item_line += f" {rank_display}"
  1416. if time_display:
  1417. item_line += f" - {time_display}"
  1418. if count_display:
  1419. item_line += f" {count_display}"
  1420. elif format_type == "slack":
  1421. if url:
  1422. item_line = f" {index}. <{url}|{title}>"
  1423. else:
  1424. item_line = f" {index}. {title}"
  1425. if rank_display:
  1426. item_line += f" {rank_display}"
  1427. if time_display:
  1428. item_line += f" _{time_display}_"
  1429. if count_display:
  1430. item_line += f" {count_display}"
  1431. else:
  1432. # wework, bark, ntfy
  1433. if url:
  1434. item_line = f" {index}. [{title}]({url})"
  1435. else:
  1436. item_line = f" {index}. {title}"
  1437. if rank_display:
  1438. item_line += f" {rank_display}"
  1439. if time_display:
  1440. item_line += f" - {time_display}"
  1441. if count_display:
  1442. item_line += f" {count_display}"
  1443. item_line += "\n"
  1444. return item_line
  1445. def _format_standalone_rss_item(
  1446. item: Dict, index: int, format_type: str, timezone: str = "Asia/Shanghai"
  1447. ) -> str:
  1448. """格式化独立展示区的 RSS 条目
  1449. Args:
  1450. item: RSS 条目,包含 title, url, published_at, author
  1451. index: 序号
  1452. format_type: 格式类型
  1453. timezone: 时区名称
  1454. Returns:
  1455. 格式化后的条目行字符串
  1456. """
  1457. title = item.get("title", "")
  1458. url = item.get("url", "")
  1459. published_at = item.get("published_at", "")
  1460. author = item.get("author", "")
  1461. # 使用友好时间格式
  1462. friendly_time = ""
  1463. if published_at:
  1464. friendly_time = format_iso_time_friendly(published_at, timezone, include_date=True)
  1465. # 构建元信息
  1466. meta_parts = []
  1467. if friendly_time:
  1468. meta_parts.append(friendly_time)
  1469. if author:
  1470. meta_parts.append(author)
  1471. meta_str = ", ".join(meta_parts)
  1472. # 根据格式类型构建条目行
  1473. if format_type == "feishu":
  1474. if url:
  1475. item_line = f" {index}. [{title}]({url})"
  1476. else:
  1477. item_line = f" {index}. {title}"
  1478. if meta_str:
  1479. item_line += f" <font color='grey'>- {meta_str}</font>"
  1480. elif format_type == "telegram":
  1481. if url:
  1482. item_line = f" {index}. {title} ({url})"
  1483. else:
  1484. item_line = f" {index}. {title}"
  1485. if meta_str:
  1486. item_line += f" - {meta_str}"
  1487. elif format_type == "slack":
  1488. if url:
  1489. item_line = f" {index}. <{url}|{title}>"
  1490. else:
  1491. item_line = f" {index}. {title}"
  1492. if meta_str:
  1493. item_line += f" _{meta_str}_"
  1494. else:
  1495. # wework, bark, ntfy, dingtalk
  1496. if url:
  1497. item_line = f" {index}. [{title}]({url})"
  1498. else:
  1499. item_line = f" {index}. {title}"
  1500. if meta_str:
  1501. item_line += f" `{meta_str}`"
  1502. item_line += "\n"
  1503. return item_line