formatter.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402
  1. # coding=utf-8
  2. """
  3. AI 分析结果格式化模块
  4. 将 AI 分析结果格式化为各推送渠道的样式
  5. """
  6. import html as html_lib
  7. import re
  8. from .analyzer import AIAnalysisResult
  9. def _escape_html(text: str) -> str:
  10. """转义 HTML 特殊字符,防止 XSS 攻击"""
  11. return html_lib.escape(text) if text else ""
  12. def _format_list_content(text: str) -> str:
  13. """
  14. 格式化列表内容,确保序号前有换行
  15. 例如将 "1. xxx 2. yyy" 转换为:
  16. 1. xxx
  17. 2. yyy
  18. """
  19. if not text:
  20. return ""
  21. # 去除首尾空白,防止 AI 返回的内容开头就有换行导致显示空行
  22. text = text.strip()
  23. # 0. 合并序号与紧随的【标签】(防御性处理)
  24. # 将 "1.\n【投资者】:" 或 "1. 【投资者】:" 合并为 "1. 投资者:"
  25. text = re.sub(r'(\d+\.)\s*【([^】]+)】([::]?)', r'\1 \2:', text)
  26. # 1. 规范化:确保 "1." 后面有空格
  27. result = re.sub(r'(\d+)\.([^ \d])', r'\1. \2', text)
  28. # 2. 强制换行:匹配 "数字.",且前面不是换行符
  29. result = re.sub(r'(?<=[^\n])\s+(\d+\.)', r'\n\1', result)
  30. # 3. 处理 "1.**粗体**" 这种情况(虽然 Prompt 要求不输出 Markdown,但防御性处理)
  31. result = re.sub(r'(?<=[^\n])(\d+\.\*\*)', r'\n\1', result)
  32. # 4. 处理中文标点后的换行
  33. result = re.sub(r'([::;,。;,])\s*(\d+\.)', r'\1\n\2', result)
  34. # 5. 处理 "XX方面:"、"XX领域:" 等子标题换行
  35. # 只有在中文标点(句号、逗号、分号等)后才触发换行,避免破坏 "1. XX领域:" 格式
  36. result = re.sub(r'([。!?;,、])\s*([a-zA-Z0-9\u4e00-\u9fa5]+(方面|领域)[::])', r'\1\n\2', result)
  37. # 6. 处理 【标签】 格式
  38. # 6a. 标签前确保空行分隔(文本开头除外)
  39. result = re.sub(r'(?<=\S)\n*(【[^】]+】)', r'\n\n\1', result)
  40. # 6b. 合并标签与被换行拆开的冒号:【tag】\n: → 【tag】:
  41. result = re.sub(r'(【[^】]+】)\n+([::])', r'\1\2', result)
  42. # 6c. 标签后(含可选冒号),如果紧跟非空白非冒号内容则另起一行
  43. # 用 (?=[^\s::]) 避免正则回溯将冒号误判为"内容"而拆开 【tag】:
  44. result = re.sub(r'(【[^】]+】[::]?)[ \t]*(?=[^\s::])', r'\1\n', result)
  45. # 7. 在列表项之间增加视觉空行
  46. # 排除 【标签】 行(以】结尾)和子标题行(以冒号结尾)之后的情况,避免标题与首项之间出现空行
  47. result = re.sub(r'(?<![::】])\n(\d+\.)', r'\n\n\1', result)
  48. return result
  49. def _format_standalone_summaries(summaries: dict) -> str:
  50. """格式化独立展示区概括为纯文本行,每个源名称单独一行"""
  51. if not summaries:
  52. return ""
  53. lines = []
  54. for source_name, summary in summaries.items():
  55. if summary:
  56. lines.append(f"[{source_name}]:\n{summary}")
  57. return "\n\n".join(lines)
  58. def render_ai_analysis_markdown(result: AIAnalysisResult) -> str:
  59. """渲染为通用 Markdown 格式(Telegram、企业微信、ntfy、Bark、Slack)"""
  60. if not result.success:
  61. return f"⚠️ AI 分析失败: {result.error}"
  62. lines = ["**✨ AI 热点分析**", ""]
  63. if result.core_trends:
  64. lines.extend(["**核心热点态势**", _format_list_content(result.core_trends), ""])
  65. if result.sentiment_controversy:
  66. lines.extend(
  67. ["**舆论风向争议**", _format_list_content(result.sentiment_controversy), ""]
  68. )
  69. if result.signals:
  70. lines.extend(["**异动与弱信号**", _format_list_content(result.signals), ""])
  71. if result.rss_insights:
  72. lines.extend(
  73. ["**RSS 深度洞察**", _format_list_content(result.rss_insights), ""]
  74. )
  75. if result.outlook_strategy:
  76. lines.extend(
  77. ["**研判策略建议**", _format_list_content(result.outlook_strategy), ""]
  78. )
  79. if result.standalone_summaries:
  80. summaries_text = _format_standalone_summaries(result.standalone_summaries)
  81. if summaries_text:
  82. lines.extend(["**独立源点速览**", summaries_text])
  83. return "\n".join(lines)
  84. def render_ai_analysis_feishu(result: AIAnalysisResult) -> str:
  85. """渲染为飞书卡片 Markdown 格式"""
  86. if not result.success:
  87. return f"⚠️ AI 分析失败: {result.error}"
  88. lines = ["**✨ AI 热点分析**", ""]
  89. if result.core_trends:
  90. lines.extend(["**核心热点态势**", _format_list_content(result.core_trends), ""])
  91. if result.sentiment_controversy:
  92. lines.extend(
  93. ["**舆论风向争议**", _format_list_content(result.sentiment_controversy), ""]
  94. )
  95. if result.signals:
  96. lines.extend(["**异动与弱信号**", _format_list_content(result.signals), ""])
  97. if result.rss_insights:
  98. lines.extend(
  99. ["**RSS 深度洞察**", _format_list_content(result.rss_insights), ""]
  100. )
  101. if result.outlook_strategy:
  102. lines.extend(
  103. ["**研判策略建议**", _format_list_content(result.outlook_strategy), ""]
  104. )
  105. if result.standalone_summaries:
  106. summaries_text = _format_standalone_summaries(result.standalone_summaries)
  107. if summaries_text:
  108. lines.extend(["**独立源点速览**", summaries_text])
  109. return "\n".join(lines)
  110. def render_ai_analysis_dingtalk(result: AIAnalysisResult) -> str:
  111. """渲染为钉钉 Markdown 格式"""
  112. if not result.success:
  113. return f"⚠️ AI 分析失败: {result.error}"
  114. lines = ["### ✨ AI 热点分析", ""]
  115. if result.core_trends:
  116. lines.extend(
  117. ["#### 核心热点态势", _format_list_content(result.core_trends), ""]
  118. )
  119. if result.sentiment_controversy:
  120. lines.extend(
  121. [
  122. "#### 舆论风向争议",
  123. _format_list_content(result.sentiment_controversy),
  124. "",
  125. ]
  126. )
  127. if result.signals:
  128. lines.extend(["#### 异动与弱信号", _format_list_content(result.signals), ""])
  129. if result.rss_insights:
  130. lines.extend(
  131. ["#### RSS 深度洞察", _format_list_content(result.rss_insights), ""]
  132. )
  133. if result.outlook_strategy:
  134. lines.extend(
  135. ["#### 研判策略建议", _format_list_content(result.outlook_strategy), ""]
  136. )
  137. if result.standalone_summaries:
  138. summaries_text = _format_standalone_summaries(result.standalone_summaries)
  139. if summaries_text:
  140. lines.extend(["#### 独立源点速览", summaries_text])
  141. return "\n".join(lines)
  142. def render_ai_analysis_html(result: AIAnalysisResult) -> str:
  143. """渲染为 HTML 格式(邮件)"""
  144. if not result.success:
  145. return (
  146. f'<div class="ai-error">⚠️ AI 分析失败: {_escape_html(result.error)}</div>'
  147. )
  148. html_parts = ['<div class="ai-analysis">', "<h3>✨ AI 热点分析</h3>"]
  149. if result.core_trends:
  150. content = _format_list_content(result.core_trends)
  151. content_html = _escape_html(content).replace("\n", "<br>")
  152. html_parts.extend(
  153. [
  154. '<div class="ai-section">',
  155. "<h4>核心热点态势</h4>",
  156. f'<div class="ai-content">{content_html}</div>',
  157. "</div>",
  158. ]
  159. )
  160. if result.sentiment_controversy:
  161. content = _format_list_content(result.sentiment_controversy)
  162. content_html = _escape_html(content).replace("\n", "<br>")
  163. html_parts.extend(
  164. [
  165. '<div class="ai-section">',
  166. "<h4>舆论风向争议</h4>",
  167. f'<div class="ai-content">{content_html}</div>',
  168. "</div>",
  169. ]
  170. )
  171. if result.signals:
  172. content = _format_list_content(result.signals)
  173. content_html = _escape_html(content).replace("\n", "<br>")
  174. html_parts.extend(
  175. [
  176. '<div class="ai-section">',
  177. "<h4>异动与弱信号</h4>",
  178. f'<div class="ai-content">{content_html}</div>',
  179. "</div>",
  180. ]
  181. )
  182. if result.rss_insights:
  183. content = _format_list_content(result.rss_insights)
  184. content_html = _escape_html(content).replace("\n", "<br>")
  185. html_parts.extend(
  186. [
  187. '<div class="ai-section">',
  188. "<h4>RSS 深度洞察</h4>",
  189. f'<div class="ai-content">{content_html}</div>',
  190. "</div>",
  191. ]
  192. )
  193. if result.outlook_strategy:
  194. content = _format_list_content(result.outlook_strategy)
  195. content_html = _escape_html(content).replace("\n", "<br>")
  196. html_parts.extend(
  197. [
  198. '<div class="ai-section ai-conclusion">',
  199. "<h4>研判策略建议</h4>",
  200. f'<div class="ai-content">{content_html}</div>',
  201. "</div>",
  202. ]
  203. )
  204. if result.standalone_summaries:
  205. summaries_text = _format_standalone_summaries(result.standalone_summaries)
  206. if summaries_text:
  207. summaries_html = _escape_html(summaries_text).replace("\n", "<br>")
  208. html_parts.extend(
  209. [
  210. '<div class="ai-section">',
  211. "<h4>独立源点速览</h4>",
  212. f'<div class="ai-content">{summaries_html}</div>',
  213. "</div>",
  214. ]
  215. )
  216. html_parts.append("</div>")
  217. return "\n".join(html_parts)
  218. def render_ai_analysis_plain(result: AIAnalysisResult) -> str:
  219. """渲染为纯文本格式"""
  220. if not result.success:
  221. return f"AI 分析失败: {result.error}"
  222. lines = ["【✨ AI 热点分析】", ""]
  223. if result.core_trends:
  224. lines.extend(["[核心热点态势]", _format_list_content(result.core_trends), ""])
  225. if result.sentiment_controversy:
  226. lines.extend(
  227. ["[舆论风向争议]", _format_list_content(result.sentiment_controversy), ""]
  228. )
  229. if result.signals:
  230. lines.extend(["[异动与弱信号]", _format_list_content(result.signals), ""])
  231. if result.rss_insights:
  232. lines.extend(["[RSS 深度洞察]", _format_list_content(result.rss_insights), ""])
  233. if result.outlook_strategy:
  234. lines.extend(["[研判策略建议]", _format_list_content(result.outlook_strategy), ""])
  235. if result.standalone_summaries:
  236. summaries_text = _format_standalone_summaries(result.standalone_summaries)
  237. if summaries_text:
  238. lines.extend(["[独立源点速览]", summaries_text])
  239. return "\n".join(lines)
  240. def get_ai_analysis_renderer(channel: str):
  241. """根据渠道获取对应的渲染函数"""
  242. renderers = {
  243. "feishu": render_ai_analysis_feishu,
  244. "dingtalk": render_ai_analysis_dingtalk,
  245. "wework": render_ai_analysis_markdown,
  246. "telegram": render_ai_analysis_markdown,
  247. "email": render_ai_analysis_html_rich, # 邮件使用丰富样式,配合 HTML 报告的 CSS
  248. "ntfy": render_ai_analysis_markdown,
  249. "bark": render_ai_analysis_plain,
  250. "slack": render_ai_analysis_markdown,
  251. }
  252. return renderers.get(channel, render_ai_analysis_markdown)
  253. def render_ai_analysis_html_rich(result: AIAnalysisResult) -> str:
  254. """渲染为丰富样式的 HTML 格式(HTML 报告用)"""
  255. if not result:
  256. return ""
  257. # 检查是否成功
  258. if not result.success:
  259. error_msg = result.error or "未知错误"
  260. return f"""
  261. <div class="ai-section">
  262. <div class="ai-error">⚠️ AI 分析失败: {_escape_html(str(error_msg))}</div>
  263. </div>"""
  264. ai_html = """
  265. <div class="ai-section">
  266. <div class="ai-section-header">
  267. <div class="ai-section-title">✨ AI 热点分析</div>
  268. <span class="ai-section-badge">AI</span>
  269. </div>"""
  270. if result.core_trends:
  271. content = _format_list_content(result.core_trends)
  272. content_html = _escape_html(content).replace("\n", "<br>")
  273. ai_html += f"""
  274. <div class="ai-block">
  275. <div class="ai-block-title">核心热点态势</div>
  276. <div class="ai-block-content">{content_html}</div>
  277. </div>"""
  278. if result.sentiment_controversy:
  279. content = _format_list_content(result.sentiment_controversy)
  280. content_html = _escape_html(content).replace("\n", "<br>")
  281. ai_html += f"""
  282. <div class="ai-block">
  283. <div class="ai-block-title">舆论风向争议</div>
  284. <div class="ai-block-content">{content_html}</div>
  285. </div>"""
  286. if result.signals:
  287. content = _format_list_content(result.signals)
  288. content_html = _escape_html(content).replace("\n", "<br>")
  289. ai_html += f"""
  290. <div class="ai-block">
  291. <div class="ai-block-title">异动与弱信号</div>
  292. <div class="ai-block-content">{content_html}</div>
  293. </div>"""
  294. if result.rss_insights:
  295. content = _format_list_content(result.rss_insights)
  296. content_html = _escape_html(content).replace("\n", "<br>")
  297. ai_html += f"""
  298. <div class="ai-block">
  299. <div class="ai-block-title">RSS 深度洞察</div>
  300. <div class="ai-block-content">{content_html}</div>
  301. </div>"""
  302. if result.outlook_strategy:
  303. content = _format_list_content(result.outlook_strategy)
  304. content_html = _escape_html(content).replace("\n", "<br>")
  305. ai_html += f"""
  306. <div class="ai-block">
  307. <div class="ai-block-title">研判策略建议</div>
  308. <div class="ai-block-content">{content_html}</div>
  309. </div>"""
  310. if result.standalone_summaries:
  311. summaries_text = _format_standalone_summaries(result.standalone_summaries)
  312. if summaries_text:
  313. summaries_html = _escape_html(summaries_text).replace("\n", "<br>")
  314. ai_html += f"""
  315. <div class="ai-block">
  316. <div class="ai-block-title">独立源点速览</div>
  317. <div class="ai-block-content">{summaries_html}</div>
  318. </div>"""
  319. ai_html += """
  320. </div>"""
  321. return ai_html