formatter.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339
  1. # coding=utf-8
  2. """
  3. AI 分析结果格式化模块
  4. 将 AI 分析结果格式化为各推送渠道的样式
  5. """
  6. import html as html_lib
  7. import re
  8. from .analyzer import AIAnalysisResult
  9. def _escape_html(text: str) -> str:
  10. """转义 HTML 特殊字符,防止 XSS 攻击"""
  11. return html_lib.escape(text) if text else ""
  12. def _format_list_content(text: str) -> str:
  13. """
  14. 格式化列表内容,确保序号前有换行
  15. 例如将 "1. xxx 2. yyy" 转换为:
  16. 1. xxx
  17. 2. yyy
  18. """
  19. if not text:
  20. return ""
  21. # 去除首尾空白,防止 AI 返回的内容开头就有换行导致显示空行
  22. text = text.strip()
  23. # 1. 规范化:确保 "1." 后面有空格
  24. result = re.sub(r'(\d+)\.([^ \d])', r'\1. \2', text)
  25. # 2. 强制换行:匹配 "数字.",且前面不是换行符
  26. result = re.sub(r'(?<=[^\n])\s+(\d+\.)', r'\n\1', result)
  27. # 3. 处理 "1.**粗体**" 这种情况(虽然 Prompt 要求不输出 Markdown,但防御性处理)
  28. result = re.sub(r'(?<=[^\n])(\d+\.\*\*)', r'\n\1', result)
  29. # 4. 处理中文标点后的换行
  30. result = re.sub(r'([::;,。;,])\s*(\d+\.)', r'\1\n\2', result)
  31. # 5. 处理 "XX方面:"、"XX领域:" 等子标题换行
  32. # 只有在中文标点(句号、逗号、分号等)后才触发换行,避免破坏 "1. XX领域:" 格式
  33. result = re.sub(r'([。!?;,、])\s*([a-zA-Z0-9\u4e00-\u9fa5]+(方面|领域)[::])', r'\1\n\2', result)
  34. # 6. 处理 "【XX】:"(如【宏观主线】:) 前的换行,确保视觉分隔
  35. result = re.sub(r'(?<=[^\n])\s*(【[^】]+】[::])', r'\n\n\1', result)
  36. # 7. 在列表项之间增加视觉空行(将 \n数字. 替换为 \n\n数字.)
  37. # 但排除标题行(以冒号结尾)之后的情况,避免标题和第一项之间有空行
  38. # (?<![::]) 是负向后瞻,表示前面不能是冒号
  39. result = re.sub(r'(?<![::])\n(\d+\.)', r'\n\n\1', result)
  40. return result
  41. def render_ai_analysis_markdown(result: AIAnalysisResult) -> str:
  42. """渲染为通用 Markdown 格式(Telegram、企业微信、ntfy、Bark、Slack)"""
  43. if not result.success:
  44. return f"⚠️ AI 分析失败: {result.error}"
  45. lines = ["**✨ AI 热点分析**", ""]
  46. if result.core_trends:
  47. lines.extend(["**核心热点态势**", _format_list_content(result.core_trends), ""])
  48. if result.sentiment_controversy:
  49. lines.extend(
  50. ["**舆论风向争议**", _format_list_content(result.sentiment_controversy), ""]
  51. )
  52. if result.signals:
  53. lines.extend(["**异动与弱信号**", _format_list_content(result.signals), ""])
  54. if result.rss_insights:
  55. lines.extend(
  56. ["**RSS 深度洞察**", _format_list_content(result.rss_insights), ""]
  57. )
  58. if result.outlook_strategy:
  59. lines.extend(
  60. ["**研判策略建议**", _format_list_content(result.outlook_strategy)]
  61. )
  62. return "\n".join(lines)
  63. def render_ai_analysis_feishu(result: AIAnalysisResult) -> str:
  64. """渲染为飞书卡片 Markdown 格式"""
  65. if not result.success:
  66. return f"⚠️ AI 分析失败: {result.error}"
  67. lines = ["**✨ AI 热点分析**", ""]
  68. if result.core_trends:
  69. lines.extend(["**核心热点态势**", _format_list_content(result.core_trends), ""])
  70. if result.sentiment_controversy:
  71. lines.extend(
  72. ["**舆论风向争议**", _format_list_content(result.sentiment_controversy), ""]
  73. )
  74. if result.signals:
  75. lines.extend(["**异动与弱信号**", _format_list_content(result.signals), ""])
  76. if result.rss_insights:
  77. lines.extend(
  78. ["**RSS 深度洞察**", _format_list_content(result.rss_insights), ""]
  79. )
  80. if result.outlook_strategy:
  81. lines.extend(
  82. ["**研判策略建议**", _format_list_content(result.outlook_strategy)]
  83. )
  84. return "\n".join(lines)
  85. def render_ai_analysis_dingtalk(result: AIAnalysisResult) -> str:
  86. """渲染为钉钉 Markdown 格式"""
  87. if not result.success:
  88. return f"⚠️ AI 分析失败: {result.error}"
  89. lines = ["### ✨ AI 热点分析", ""]
  90. if result.core_trends:
  91. lines.extend(
  92. ["#### 核心热点态势", _format_list_content(result.core_trends), ""]
  93. )
  94. if result.sentiment_controversy:
  95. lines.extend(
  96. [
  97. "#### 舆论风向争议",
  98. _format_list_content(result.sentiment_controversy),
  99. "",
  100. ]
  101. )
  102. if result.signals:
  103. lines.extend(["#### 异动与弱信号", _format_list_content(result.signals), ""])
  104. if result.rss_insights:
  105. lines.extend(
  106. ["#### RSS 深度洞察", _format_list_content(result.rss_insights), ""]
  107. )
  108. if result.outlook_strategy:
  109. lines.extend(
  110. ["#### 研判策略建议", _format_list_content(result.outlook_strategy)]
  111. )
  112. return "\n".join(lines)
  113. def render_ai_analysis_html(result: AIAnalysisResult) -> str:
  114. """渲染为 HTML 格式(邮件)"""
  115. if not result.success:
  116. return (
  117. f'<div class="ai-error">⚠️ AI 分析失败: {_escape_html(result.error)}</div>'
  118. )
  119. html_parts = ['<div class="ai-analysis">', "<h3>✨ AI 热点分析</h3>"]
  120. if result.core_trends:
  121. content = _format_list_content(result.core_trends)
  122. content_html = _escape_html(content).replace("\n", "<br>")
  123. html_parts.extend(
  124. [
  125. '<div class="ai-section">',
  126. "<h4>核心热点态势</h4>",
  127. f'<div class="ai-content">{content_html}</div>',
  128. "</div>",
  129. ]
  130. )
  131. if result.sentiment_controversy:
  132. content = _format_list_content(result.sentiment_controversy)
  133. content_html = _escape_html(content).replace("\n", "<br>")
  134. html_parts.extend(
  135. [
  136. '<div class="ai-section">',
  137. "<h4>舆论风向争议</h4>",
  138. f'<div class="ai-content">{content_html}</div>',
  139. "</div>",
  140. ]
  141. )
  142. if result.signals:
  143. content = _format_list_content(result.signals)
  144. content_html = _escape_html(content).replace("\n", "<br>")
  145. html_parts.extend(
  146. [
  147. '<div class="ai-section">',
  148. "<h4>异动与弱信号</h4>",
  149. f'<div class="ai-content">{content_html}</div>',
  150. "</div>",
  151. ]
  152. )
  153. if result.rss_insights:
  154. content = _format_list_content(result.rss_insights)
  155. content_html = _escape_html(content).replace("\n", "<br>")
  156. html_parts.extend(
  157. [
  158. '<div class="ai-section">',
  159. "<h4>RSS 深度洞察</h4>",
  160. f'<div class="ai-content">{content_html}</div>',
  161. "</div>",
  162. ]
  163. )
  164. if result.outlook_strategy:
  165. content = _format_list_content(result.outlook_strategy)
  166. content_html = _escape_html(content).replace("\n", "<br>")
  167. html_parts.extend(
  168. [
  169. '<div class="ai-section ai-conclusion">',
  170. "<h4>研判策略建议</h4>",
  171. f'<div class="ai-content">{content_html}</div>',
  172. "</div>",
  173. ]
  174. )
  175. html_parts.append("</div>")
  176. return "\n".join(html_parts)
  177. def render_ai_analysis_plain(result: AIAnalysisResult) -> str:
  178. """渲染为纯文本格式"""
  179. if not result.success:
  180. return f"AI 分析失败: {result.error}"
  181. lines = ["【✨ AI 热点分析】", ""]
  182. if result.core_trends:
  183. lines.extend(["[核心热点态势]", _format_list_content(result.core_trends), ""])
  184. if result.sentiment_controversy:
  185. lines.extend(
  186. ["[舆论风向争议]", _format_list_content(result.sentiment_controversy), ""]
  187. )
  188. if result.signals:
  189. lines.extend(["[异动与弱信号]", _format_list_content(result.signals), ""])
  190. if result.rss_insights:
  191. lines.extend(["[RSS 深度洞察]", _format_list_content(result.rss_insights), ""])
  192. if result.outlook_strategy:
  193. lines.extend(["[研判策略建议]", _format_list_content(result.outlook_strategy)])
  194. return "\n".join(lines)
  195. def get_ai_analysis_renderer(channel: str):
  196. """根据渠道获取对应的渲染函数"""
  197. renderers = {
  198. "feishu": render_ai_analysis_feishu,
  199. "dingtalk": render_ai_analysis_dingtalk,
  200. "wework": render_ai_analysis_markdown,
  201. "telegram": render_ai_analysis_markdown,
  202. "email": render_ai_analysis_html_rich, # 邮件使用丰富样式,配合 HTML 报告的 CSS
  203. "ntfy": render_ai_analysis_markdown,
  204. "bark": render_ai_analysis_plain,
  205. "slack": render_ai_analysis_markdown,
  206. }
  207. return renderers.get(channel, render_ai_analysis_markdown)
  208. def render_ai_analysis_html_rich(result: AIAnalysisResult) -> str:
  209. """渲染为丰富样式的 HTML 格式(HTML 报告用)"""
  210. if not result:
  211. return ""
  212. # 检查是否成功
  213. if not result.success:
  214. error_msg = result.error or "未知错误"
  215. return f"""
  216. <div class="ai-section">
  217. <div class="ai-error">⚠️ AI 分析失败: {_escape_html(str(error_msg))}</div>
  218. </div>"""
  219. ai_html = """
  220. <div class="ai-section">
  221. <div class="ai-section-header">
  222. <div class="ai-section-title">✨ AI 热点分析</div>
  223. <span class="ai-section-badge">AI</span>
  224. </div>"""
  225. if result.core_trends:
  226. content = _format_list_content(result.core_trends)
  227. content_html = _escape_html(content).replace("\n", "<br>")
  228. ai_html += f"""
  229. <div class="ai-block">
  230. <div class="ai-block-title">核心热点态势</div>
  231. <div class="ai-block-content">{content_html}</div>
  232. </div>"""
  233. if result.sentiment_controversy:
  234. content = _format_list_content(result.sentiment_controversy)
  235. content_html = _escape_html(content).replace("\n", "<br>")
  236. ai_html += f"""
  237. <div class="ai-block">
  238. <div class="ai-block-title">舆论风向争议</div>
  239. <div class="ai-block-content">{content_html}</div>
  240. </div>"""
  241. if result.signals:
  242. content = _format_list_content(result.signals)
  243. content_html = _escape_html(content).replace("\n", "<br>")
  244. ai_html += f"""
  245. <div class="ai-block">
  246. <div class="ai-block-title">异动与弱信号</div>
  247. <div class="ai-block-content">{content_html}</div>
  248. </div>"""
  249. if result.rss_insights:
  250. content = _format_list_content(result.rss_insights)
  251. content_html = _escape_html(content).replace("\n", "<br>")
  252. ai_html += f"""
  253. <div class="ai-block">
  254. <div class="ai-block-title">RSS 深度洞察</div>
  255. <div class="ai-block-content">{content_html}</div>
  256. </div>"""
  257. if result.outlook_strategy:
  258. content = _format_list_content(result.outlook_strategy)
  259. content_html = _escape_html(content).replace("\n", "<br>")
  260. ai_html += f"""
  261. <div class="ai-block">
  262. <div class="ai-block-title">研判策略建议</div>
  263. <div class="ai-block-content">{content_html}</div>
  264. </div>"""
  265. ai_html += """
  266. </div>"""
  267. return ai_html