formatter.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438
  1. # coding=utf-8
  2. """
  3. AI 分析结果格式化模块
  4. 将 AI 分析结果格式化为各推送渠道的样式
  5. """
  6. import html as html_lib
  7. import re
  8. from .analyzer import AIAnalysisResult
  9. def _escape_html(text: str) -> str:
  10. """转义 HTML 特殊字符,防止 XSS 攻击"""
  11. return html_lib.escape(text) if text else ""
  12. def _format_list_content(text: str) -> str:
  13. """
  14. 格式化列表内容,确保序号前有换行
  15. 例如将 "1. xxx 2. yyy" 转换为:
  16. 1. xxx
  17. 2. yyy
  18. """
  19. if not text:
  20. return ""
  21. # 去除首尾空白,防止 AI 返回的内容开头就有换行导致显示空行
  22. text = text.strip()
  23. # 0. 合并序号与紧随的【标签】(防御性处理)
  24. # 将 "1.\n【投资者】:" 或 "1. 【投资者】:" 合并为 "1. 投资者:"
  25. text = re.sub(r'(\d+\.)\s*【([^】]+)】([::]?)', r'\1 \2:', text)
  26. # 1. 规范化:确保 "1." 后面有空格
  27. result = re.sub(r'(\d+)\.([^ \d])', r'\1. \2', text)
  28. # 2. 强制换行:匹配 "数字.",且前面不是换行符
  29. # (?!\d) 排除版本号/小数(如 2.0、3.5),避免将其误判为列表序号
  30. result = re.sub(r'(?<=[^\n])\s+(\d+\.)(?!\d)', r'\n\1', result)
  31. # 3. 处理 "1.**粗体**" 这种情况(虽然 Prompt 要求不输出 Markdown,但防御性处理)
  32. result = re.sub(r'(?<=[^\n])(\d+\.\*\*)', r'\n\1', result)
  33. # 4. 处理中文标点后的换行(排除版本号/小数)
  34. result = re.sub(r'([::;,。;,])\s*(\d+\.)(?!\d)', r'\1\n\2', result)
  35. # 5. 处理 "XX方面:"、"XX领域:" 等子标题换行
  36. # 只有在中文标点(句号、逗号、分号等)后才触发换行,避免破坏 "1. XX领域:" 格式
  37. result = re.sub(r'([。!?;,、])\s*([a-zA-Z0-9\u4e00-\u9fa5]+(方面|领域)[::])', r'\1\n\2', result)
  38. # 6. 处理 【标签】 格式
  39. # 6a. 标签前确保空行分隔(文本开头除外)
  40. result = re.sub(r'(?<=\S)\n*(【[^】]+】)', r'\n\n\1', result)
  41. # 6b. 合并标签与被换行拆开的冒号:【tag】\n: → 【tag】:
  42. result = re.sub(r'(【[^】]+】)\n+([::])', r'\1\2', result)
  43. # 6c. 标签后(含可选冒号),如果紧跟非空白非冒号内容则另起一行
  44. # 用 (?=[^\s::]) 避免正则回溯将冒号误判为"内容"而拆开 【tag】:
  45. result = re.sub(r'(【[^】]+】[::]?)[ \t]*(?=[^\s::])', r'\1\n', result)
  46. # 7. 在列表项之间增加视觉空行(排除版本号/小数)
  47. # 排除 【标签】 行(以】结尾)和子标题行(以冒号结尾)之后的情况,避免标题与首项之间出现空行
  48. result = re.sub(r'(?<![::】])\n(\d+\.)(?!\d)', r'\n\n\1', result)
  49. return result
  50. def _format_standalone_summaries(summaries: dict) -> str:
  51. """格式化独立展示区概括为纯文本行,每个源名称单独一行"""
  52. if not summaries:
  53. return ""
  54. lines = []
  55. for source_name, summary in summaries.items():
  56. if summary:
  57. lines.append(f"[{source_name}]:\n{summary}")
  58. return "\n\n".join(lines)
  59. def render_ai_analysis_markdown(result: AIAnalysisResult) -> str:
  60. """渲染为通用 Markdown 格式(Telegram、企业微信、ntfy、Bark、Slack)"""
  61. if not result.success:
  62. return f"⚠️ AI 分析失败: {result.error}"
  63. lines = ["**✨ AI 热点分析**", ""]
  64. if result.core_trends:
  65. lines.extend(["**核心热点态势**", _format_list_content(result.core_trends), ""])
  66. if result.sentiment_controversy:
  67. lines.extend(
  68. ["**舆论风向争议**", _format_list_content(result.sentiment_controversy), ""]
  69. )
  70. if result.signals:
  71. lines.extend(["**异动与弱信号**", _format_list_content(result.signals), ""])
  72. if result.rss_insights:
  73. lines.extend(
  74. ["**RSS 深度洞察**", _format_list_content(result.rss_insights), ""]
  75. )
  76. if result.outlook_strategy:
  77. lines.extend(
  78. ["**研判策略建议**", _format_list_content(result.outlook_strategy), ""]
  79. )
  80. if result.standalone_summaries:
  81. summaries_text = _format_standalone_summaries(result.standalone_summaries)
  82. if summaries_text:
  83. lines.extend(["**独立源点速览**", summaries_text])
  84. return "\n".join(lines)
  85. def render_ai_analysis_feishu(result: AIAnalysisResult) -> str:
  86. """渲染为飞书卡片 Markdown 格式"""
  87. if not result.success:
  88. return f"⚠️ AI 分析失败: {result.error}"
  89. lines = ["**✨ AI 热点分析**", ""]
  90. if result.core_trends:
  91. lines.extend(["**核心热点态势**", _format_list_content(result.core_trends), ""])
  92. if result.sentiment_controversy:
  93. lines.extend(
  94. ["**舆论风向争议**", _format_list_content(result.sentiment_controversy), ""]
  95. )
  96. if result.signals:
  97. lines.extend(["**异动与弱信号**", _format_list_content(result.signals), ""])
  98. if result.rss_insights:
  99. lines.extend(
  100. ["**RSS 深度洞察**", _format_list_content(result.rss_insights), ""]
  101. )
  102. if result.outlook_strategy:
  103. lines.extend(
  104. ["**研判策略建议**", _format_list_content(result.outlook_strategy), ""]
  105. )
  106. if result.standalone_summaries:
  107. summaries_text = _format_standalone_summaries(result.standalone_summaries)
  108. if summaries_text:
  109. lines.extend(["**独立源点速览**", summaries_text])
  110. return "\n".join(lines)
  111. def render_ai_analysis_dingtalk(result: AIAnalysisResult) -> str:
  112. """渲染为钉钉 Markdown 格式"""
  113. if not result.success:
  114. return f"⚠️ AI 分析失败: {result.error}"
  115. lines = ["### ✨ AI 热点分析", ""]
  116. if result.core_trends:
  117. lines.extend(
  118. ["#### 核心热点态势", _format_list_content(result.core_trends), ""]
  119. )
  120. if result.sentiment_controversy:
  121. lines.extend(
  122. [
  123. "#### 舆论风向争议",
  124. _format_list_content(result.sentiment_controversy),
  125. "",
  126. ]
  127. )
  128. if result.signals:
  129. lines.extend(["#### 异动与弱信号", _format_list_content(result.signals), ""])
  130. if result.rss_insights:
  131. lines.extend(
  132. ["#### RSS 深度洞察", _format_list_content(result.rss_insights), ""]
  133. )
  134. if result.outlook_strategy:
  135. lines.extend(
  136. ["#### 研判策略建议", _format_list_content(result.outlook_strategy), ""]
  137. )
  138. if result.standalone_summaries:
  139. summaries_text = _format_standalone_summaries(result.standalone_summaries)
  140. if summaries_text:
  141. lines.extend(["#### 独立源点速览", summaries_text])
  142. return "\n".join(lines)
  143. def render_ai_analysis_html(result: AIAnalysisResult) -> str:
  144. """渲染为 HTML 格式(邮件)"""
  145. if not result.success:
  146. return (
  147. f'<div class="ai-error">⚠️ AI 分析失败: {_escape_html(result.error)}</div>'
  148. )
  149. html_parts = ['<div class="ai-analysis">', "<h3>✨ AI 热点分析</h3>"]
  150. if result.core_trends:
  151. content = _format_list_content(result.core_trends)
  152. content_html = _escape_html(content).replace("\n", "<br>")
  153. html_parts.extend(
  154. [
  155. '<div class="ai-section">',
  156. "<h4>核心热点态势</h4>",
  157. f'<div class="ai-content">{content_html}</div>',
  158. "</div>",
  159. ]
  160. )
  161. if result.sentiment_controversy:
  162. content = _format_list_content(result.sentiment_controversy)
  163. content_html = _escape_html(content).replace("\n", "<br>")
  164. html_parts.extend(
  165. [
  166. '<div class="ai-section">',
  167. "<h4>舆论风向争议</h4>",
  168. f'<div class="ai-content">{content_html}</div>',
  169. "</div>",
  170. ]
  171. )
  172. if result.signals:
  173. content = _format_list_content(result.signals)
  174. content_html = _escape_html(content).replace("\n", "<br>")
  175. html_parts.extend(
  176. [
  177. '<div class="ai-section">',
  178. "<h4>异动与弱信号</h4>",
  179. f'<div class="ai-content">{content_html}</div>',
  180. "</div>",
  181. ]
  182. )
  183. if result.rss_insights:
  184. content = _format_list_content(result.rss_insights)
  185. content_html = _escape_html(content).replace("\n", "<br>")
  186. html_parts.extend(
  187. [
  188. '<div class="ai-section">',
  189. "<h4>RSS 深度洞察</h4>",
  190. f'<div class="ai-content">{content_html}</div>',
  191. "</div>",
  192. ]
  193. )
  194. if result.outlook_strategy:
  195. content = _format_list_content(result.outlook_strategy)
  196. content_html = _escape_html(content).replace("\n", "<br>")
  197. html_parts.extend(
  198. [
  199. '<div class="ai-section ai-conclusion">',
  200. "<h4>研判策略建议</h4>",
  201. f'<div class="ai-content">{content_html}</div>',
  202. "</div>",
  203. ]
  204. )
  205. if result.standalone_summaries:
  206. summaries_text = _format_standalone_summaries(result.standalone_summaries)
  207. if summaries_text:
  208. summaries_html = _escape_html(summaries_text).replace("\n", "<br>")
  209. html_parts.extend(
  210. [
  211. '<div class="ai-section">',
  212. "<h4>独立源点速览</h4>",
  213. f'<div class="ai-content">{summaries_html}</div>',
  214. "</div>",
  215. ]
  216. )
  217. html_parts.append("</div>")
  218. return "\n".join(html_parts)
  219. def render_ai_analysis_plain(result: AIAnalysisResult) -> str:
  220. """渲染为纯文本格式"""
  221. if not result.success:
  222. return f"AI 分析失败: {result.error}"
  223. lines = ["【✨ AI 热点分析】", ""]
  224. if result.core_trends:
  225. lines.extend(["[核心热点态势]", _format_list_content(result.core_trends), ""])
  226. if result.sentiment_controversy:
  227. lines.extend(
  228. ["[舆论风向争议]", _format_list_content(result.sentiment_controversy), ""]
  229. )
  230. if result.signals:
  231. lines.extend(["[异动与弱信号]", _format_list_content(result.signals), ""])
  232. if result.rss_insights:
  233. lines.extend(["[RSS 深度洞察]", _format_list_content(result.rss_insights), ""])
  234. if result.outlook_strategy:
  235. lines.extend(["[研判策略建议]", _format_list_content(result.outlook_strategy), ""])
  236. if result.standalone_summaries:
  237. summaries_text = _format_standalone_summaries(result.standalone_summaries)
  238. if summaries_text:
  239. lines.extend(["[独立源点速览]", summaries_text])
  240. return "\n".join(lines)
  241. def render_ai_analysis_telegram(result: AIAnalysisResult) -> str:
  242. """渲染为 Telegram HTML 格式(配合 parse_mode: HTML)
  243. Telegram Bot API 的 HTML 模式仅支持有限标签:
  244. <b>, <i>, <u>, <s>, <code>, <pre>, <a href="">, <blockquote>
  245. 换行直接使用 \\n,不支持 <br>, <div>, <h1>-<h6> 等标签。
  246. """
  247. if not result.success:
  248. return f"⚠️ AI 分析失败: {_escape_html(result.error)}"
  249. lines = ["<b>✨ AI 热点分析</b>", ""]
  250. if result.core_trends:
  251. lines.extend(["<b>核心热点态势</b>", _escape_html(_format_list_content(result.core_trends)), ""])
  252. if result.sentiment_controversy:
  253. lines.extend(["<b>舆论风向争议</b>", _escape_html(_format_list_content(result.sentiment_controversy)), ""])
  254. if result.signals:
  255. lines.extend(["<b>异动与弱信号</b>", _escape_html(_format_list_content(result.signals)), ""])
  256. if result.rss_insights:
  257. lines.extend(["<b>RSS 深度洞察</b>", _escape_html(_format_list_content(result.rss_insights)), ""])
  258. if result.outlook_strategy:
  259. lines.extend(["<b>研判策略建议</b>", _escape_html(_format_list_content(result.outlook_strategy)), ""])
  260. if result.standalone_summaries:
  261. summaries_text = _format_standalone_summaries(result.standalone_summaries)
  262. if summaries_text:
  263. lines.extend(["<b>独立源点速览</b>", _escape_html(summaries_text)])
  264. return "\n".join(lines)
  265. def get_ai_analysis_renderer(channel: str):
  266. """根据渠道获取对应的渲染函数"""
  267. renderers = {
  268. "feishu": render_ai_analysis_feishu,
  269. "dingtalk": render_ai_analysis_dingtalk,
  270. "wework": render_ai_analysis_markdown,
  271. "telegram": render_ai_analysis_telegram,
  272. "email": render_ai_analysis_html_rich, # 邮件使用丰富样式,配合 HTML 报告的 CSS
  273. "ntfy": render_ai_analysis_markdown,
  274. "bark": render_ai_analysis_plain,
  275. "slack": render_ai_analysis_markdown,
  276. }
  277. return renderers.get(channel, render_ai_analysis_markdown)
  278. def render_ai_analysis_html_rich(result: AIAnalysisResult) -> str:
  279. """渲染为丰富样式的 HTML 格式(HTML 报告用)"""
  280. if not result:
  281. return ""
  282. # 检查是否成功
  283. if not result.success:
  284. error_msg = result.error or "未知错误"
  285. return f"""
  286. <div class="ai-section">
  287. <div class="ai-error">⚠️ AI 分析失败: {_escape_html(str(error_msg))}</div>
  288. </div>"""
  289. ai_html = """
  290. <div class="ai-section">
  291. <div class="ai-section-header">
  292. <div class="ai-section-title">✨ AI 热点分析</div>
  293. <span class="ai-section-badge">AI</span>
  294. </div>"""
  295. if result.core_trends:
  296. content = _format_list_content(result.core_trends)
  297. content_html = _escape_html(content).replace("\n", "<br>")
  298. ai_html += f"""
  299. <div class="ai-block">
  300. <div class="ai-block-title">核心热点态势</div>
  301. <div class="ai-block-content">{content_html}</div>
  302. </div>"""
  303. if result.sentiment_controversy:
  304. content = _format_list_content(result.sentiment_controversy)
  305. content_html = _escape_html(content).replace("\n", "<br>")
  306. ai_html += f"""
  307. <div class="ai-block">
  308. <div class="ai-block-title">舆论风向争议</div>
  309. <div class="ai-block-content">{content_html}</div>
  310. </div>"""
  311. if result.signals:
  312. content = _format_list_content(result.signals)
  313. content_html = _escape_html(content).replace("\n", "<br>")
  314. ai_html += f"""
  315. <div class="ai-block">
  316. <div class="ai-block-title">异动与弱信号</div>
  317. <div class="ai-block-content">{content_html}</div>
  318. </div>"""
  319. if result.rss_insights:
  320. content = _format_list_content(result.rss_insights)
  321. content_html = _escape_html(content).replace("\n", "<br>")
  322. ai_html += f"""
  323. <div class="ai-block">
  324. <div class="ai-block-title">RSS 深度洞察</div>
  325. <div class="ai-block-content">{content_html}</div>
  326. </div>"""
  327. if result.outlook_strategy:
  328. content = _format_list_content(result.outlook_strategy)
  329. content_html = _escape_html(content).replace("\n", "<br>")
  330. ai_html += f"""
  331. <div class="ai-block">
  332. <div class="ai-block-title">研判策略建议</div>
  333. <div class="ai-block-content">{content_html}</div>
  334. </div>"""
  335. if result.standalone_summaries:
  336. summaries_text = _format_standalone_summaries(result.standalone_summaries)
  337. if summaries_text:
  338. summaries_html = _escape_html(summaries_text).replace("\n", "<br>")
  339. ai_html += f"""
  340. <div class="ai-block">
  341. <div class="ai-block-title">独立源点速览</div>
  342. <div class="ai-block-content">{summaries_html}</div>
  343. </div>"""
  344. ai_html += """
  345. </div>"""
  346. return ai_html