formatter.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457
  1. # coding=utf-8
  2. """
  3. AI 分析结果格式化模块
  4. 将 AI 分析结果格式化为各推送渠道的样式
  5. """
  6. import html as html_lib
  7. import re
  8. from .analyzer import AIAnalysisResult
  9. def _escape_html(text: str) -> str:
  10. """转义 HTML 特殊字符,防止 XSS 攻击"""
  11. return html_lib.escape(text) if text else ""
  12. def _format_list_content(text: str) -> str:
  13. """
  14. 格式化列表内容,确保序号前有换行
  15. 例如将 "1. xxx 2. yyy" 转换为:
  16. 1. xxx
  17. 2. yyy
  18. """
  19. if not text:
  20. return ""
  21. # 去除首尾空白,防止 AI 返回的内容开头就有换行导致显示空行
  22. text = text.strip()
  23. # 0. 合并序号与紧随的【标签】(防御性处理)
  24. # 将 "1.\n【投资者】:" 或 "1. 【投资者】:" 合并为 "1. 投资者:"
  25. text = re.sub(r'(\d+\.)\s*【([^】]+)】([::]?)', r'\1 \2:', text)
  26. # 1. 规范化:确保 "1." 后面有空格
  27. result = re.sub(r'(\d+)\.([^ \d])', r'\1. \2', text)
  28. # 2. 强制换行:匹配 "数字.",且前面不是换行符
  29. # (?!\d) 排除版本号/小数(如 2.0、3.5),避免将其误判为列表序号
  30. result = re.sub(r'(?<=[^\n])\s+(\d+\.)(?!\d)', r'\n\1', result)
  31. # 3. 处理 "1.**粗体**" 这种情况(虽然 Prompt 要求不输出 Markdown,但防御性处理)
  32. result = re.sub(r'(?<=[^\n])(\d+\.\*\*)', r'\n\1', result)
  33. # 4. 处理中文标点后的换行(排除版本号/小数)
  34. result = re.sub(r'([::;,。;,])\s*(\d+\.)(?!\d)', r'\1\n\2', result)
  35. # 5. 处理 "XX方面:"、"XX领域:" 等子标题换行
  36. # 只有在中文标点(句号、逗号、分号等)后才触发换行,避免破坏 "1. XX领域:" 格式
  37. result = re.sub(r'([。!?;,、])\s*([a-zA-Z0-9\u4e00-\u9fa5]+(方面|领域)[::])', r'\1\n\2', result)
  38. # 6. 处理 【标签】 格式
  39. # 6a. 标签前确保空行分隔(文本开头除外)
  40. result = re.sub(r'(?<=\S)\n*(【[^】]+】)', r'\n\n\1', result)
  41. # 6b. 合并标签与被换行拆开的冒号:【tag】\n: → 【tag】:
  42. result = re.sub(r'(【[^】]+】)\n+([::])', r'\1\2', result)
  43. # 6c. 标签后(含可选冒号),如果紧跟非空白非冒号内容则另起一行
  44. # 用 (?=[^\s::]) 避免正则回溯将冒号误判为"内容"而拆开 【tag】:
  45. result = re.sub(r'(【[^】]+】[::]?)[ \t]*(?=[^\s::])', r'\1\n', result)
  46. # 7. 在列表项之间增加视觉空行(排除版本号/小数)
  47. # 排除 【标签】 行(以】结尾)和子标题行(以冒号结尾)之后的情况,避免标题与首项之间出现空行
  48. result = re.sub(r'(?<![::】])\n(\d+\.)(?!\d)', r'\n\n\1', result)
  49. return result
  50. def _format_standalone_summaries(summaries: dict) -> str:
  51. """格式化独立展示区概括为纯文本行,每个源名称单独一行"""
  52. if not summaries:
  53. return ""
  54. lines = []
  55. for source_name, summary in summaries.items():
  56. if summary:
  57. lines.append(f"[{source_name}]:\n{summary}")
  58. return "\n\n".join(lines)
  59. def render_ai_analysis_markdown(result: AIAnalysisResult) -> str:
  60. """渲染为通用 Markdown 格式(Telegram、企业微信、ntfy、Bark、Slack)"""
  61. if not result.success:
  62. if result.skipped:
  63. return f"ℹ️ {result.error}"
  64. return f"⚠️ AI 分析失败: {result.error}"
  65. lines = ["**✨ AI 热点分析**", ""]
  66. if result.core_trends:
  67. lines.extend(["**核心热点态势**", _format_list_content(result.core_trends), ""])
  68. if result.sentiment_controversy:
  69. lines.extend(
  70. ["**舆论风向争议**", _format_list_content(result.sentiment_controversy), ""]
  71. )
  72. if result.signals:
  73. lines.extend(["**异动与弱信号**", _format_list_content(result.signals), ""])
  74. if result.rss_insights:
  75. lines.extend(
  76. ["**RSS 深度洞察**", _format_list_content(result.rss_insights), ""]
  77. )
  78. if result.outlook_strategy:
  79. lines.extend(
  80. ["**研判策略建议**", _format_list_content(result.outlook_strategy), ""]
  81. )
  82. if result.standalone_summaries:
  83. summaries_text = _format_standalone_summaries(result.standalone_summaries)
  84. if summaries_text:
  85. lines.extend(["**独立源点速览**", summaries_text])
  86. return "\n".join(lines)
  87. def render_ai_analysis_feishu(result: AIAnalysisResult) -> str:
  88. """渲染为飞书卡片 Markdown 格式"""
  89. if not result.success:
  90. if result.skipped:
  91. return f"ℹ️ {result.error}"
  92. return f"⚠️ AI 分析失败: {result.error}"
  93. lines = ["**✨ AI 热点分析**", ""]
  94. if result.core_trends:
  95. lines.extend(["**核心热点态势**", _format_list_content(result.core_trends), ""])
  96. if result.sentiment_controversy:
  97. lines.extend(
  98. ["**舆论风向争议**", _format_list_content(result.sentiment_controversy), ""]
  99. )
  100. if result.signals:
  101. lines.extend(["**异动与弱信号**", _format_list_content(result.signals), ""])
  102. if result.rss_insights:
  103. lines.extend(
  104. ["**RSS 深度洞察**", _format_list_content(result.rss_insights), ""]
  105. )
  106. if result.outlook_strategy:
  107. lines.extend(
  108. ["**研判策略建议**", _format_list_content(result.outlook_strategy), ""]
  109. )
  110. if result.standalone_summaries:
  111. summaries_text = _format_standalone_summaries(result.standalone_summaries)
  112. if summaries_text:
  113. lines.extend(["**独立源点速览**", summaries_text])
  114. return "\n".join(lines)
  115. def render_ai_analysis_dingtalk(result: AIAnalysisResult) -> str:
  116. """渲染为钉钉 Markdown 格式"""
  117. if not result.success:
  118. if result.skipped:
  119. return f"ℹ️ {result.error}"
  120. return f"⚠️ AI 分析失败: {result.error}"
  121. lines = ["### ✨ AI 热点分析", ""]
  122. if result.core_trends:
  123. lines.extend(
  124. ["#### 核心热点态势", _format_list_content(result.core_trends), ""]
  125. )
  126. if result.sentiment_controversy:
  127. lines.extend(
  128. [
  129. "#### 舆论风向争议",
  130. _format_list_content(result.sentiment_controversy),
  131. "",
  132. ]
  133. )
  134. if result.signals:
  135. lines.extend(["#### 异动与弱信号", _format_list_content(result.signals), ""])
  136. if result.rss_insights:
  137. lines.extend(
  138. ["#### RSS 深度洞察", _format_list_content(result.rss_insights), ""]
  139. )
  140. if result.outlook_strategy:
  141. lines.extend(
  142. ["#### 研判策略建议", _format_list_content(result.outlook_strategy), ""]
  143. )
  144. if result.standalone_summaries:
  145. summaries_text = _format_standalone_summaries(result.standalone_summaries)
  146. if summaries_text:
  147. lines.extend(["#### 独立源点速览", summaries_text])
  148. return "\n".join(lines)
  149. def render_ai_analysis_html(result: AIAnalysisResult) -> str:
  150. """渲染为 HTML 格式(邮件)"""
  151. if not result.success:
  152. if result.skipped:
  153. return f'<div class="ai-info">ℹ️ {_escape_html(result.error)}</div>'
  154. return (
  155. f'<div class="ai-error">⚠️ AI 分析失败: {_escape_html(result.error)}</div>'
  156. )
  157. html_parts = ['<div class="ai-analysis">', "<h3>✨ AI 热点分析</h3>"]
  158. if result.core_trends:
  159. content = _format_list_content(result.core_trends)
  160. content_html = _escape_html(content).replace("\n", "<br>")
  161. html_parts.extend(
  162. [
  163. '<div class="ai-section">',
  164. "<h4>核心热点态势</h4>",
  165. f'<div class="ai-content">{content_html}</div>',
  166. "</div>",
  167. ]
  168. )
  169. if result.sentiment_controversy:
  170. content = _format_list_content(result.sentiment_controversy)
  171. content_html = _escape_html(content).replace("\n", "<br>")
  172. html_parts.extend(
  173. [
  174. '<div class="ai-section">',
  175. "<h4>舆论风向争议</h4>",
  176. f'<div class="ai-content">{content_html}</div>',
  177. "</div>",
  178. ]
  179. )
  180. if result.signals:
  181. content = _format_list_content(result.signals)
  182. content_html = _escape_html(content).replace("\n", "<br>")
  183. html_parts.extend(
  184. [
  185. '<div class="ai-section">',
  186. "<h4>异动与弱信号</h4>",
  187. f'<div class="ai-content">{content_html}</div>',
  188. "</div>",
  189. ]
  190. )
  191. if result.rss_insights:
  192. content = _format_list_content(result.rss_insights)
  193. content_html = _escape_html(content).replace("\n", "<br>")
  194. html_parts.extend(
  195. [
  196. '<div class="ai-section">',
  197. "<h4>RSS 深度洞察</h4>",
  198. f'<div class="ai-content">{content_html}</div>',
  199. "</div>",
  200. ]
  201. )
  202. if result.outlook_strategy:
  203. content = _format_list_content(result.outlook_strategy)
  204. content_html = _escape_html(content).replace("\n", "<br>")
  205. html_parts.extend(
  206. [
  207. '<div class="ai-section ai-conclusion">',
  208. "<h4>研判策略建议</h4>",
  209. f'<div class="ai-content">{content_html}</div>',
  210. "</div>",
  211. ]
  212. )
  213. if result.standalone_summaries:
  214. summaries_text = _format_standalone_summaries(result.standalone_summaries)
  215. if summaries_text:
  216. summaries_html = _escape_html(summaries_text).replace("\n", "<br>")
  217. html_parts.extend(
  218. [
  219. '<div class="ai-section">',
  220. "<h4>独立源点速览</h4>",
  221. f'<div class="ai-content">{summaries_html}</div>',
  222. "</div>",
  223. ]
  224. )
  225. html_parts.append("</div>")
  226. return "\n".join(html_parts)
  227. def render_ai_analysis_plain(result: AIAnalysisResult) -> str:
  228. """渲染为纯文本格式"""
  229. if not result.success:
  230. if result.skipped:
  231. return result.error
  232. return f"AI 分析失败: {result.error}"
  233. lines = ["【✨ AI 热点分析】", ""]
  234. if result.core_trends:
  235. lines.extend(["[核心热点态势]", _format_list_content(result.core_trends), ""])
  236. if result.sentiment_controversy:
  237. lines.extend(
  238. ["[舆论风向争议]", _format_list_content(result.sentiment_controversy), ""]
  239. )
  240. if result.signals:
  241. lines.extend(["[异动与弱信号]", _format_list_content(result.signals), ""])
  242. if result.rss_insights:
  243. lines.extend(["[RSS 深度洞察]", _format_list_content(result.rss_insights), ""])
  244. if result.outlook_strategy:
  245. lines.extend(["[研判策略建议]", _format_list_content(result.outlook_strategy), ""])
  246. if result.standalone_summaries:
  247. summaries_text = _format_standalone_summaries(result.standalone_summaries)
  248. if summaries_text:
  249. lines.extend(["[独立源点速览]", summaries_text])
  250. return "\n".join(lines)
  251. def render_ai_analysis_telegram(result: AIAnalysisResult) -> str:
  252. """渲染为 Telegram HTML 格式(配合 parse_mode: HTML)
  253. Telegram Bot API 的 HTML 模式仅支持有限标签:
  254. <b>, <i>, <u>, <s>, <code>, <pre>, <a href="">, <blockquote>
  255. 换行直接使用 \\n,不支持 <br>, <div>, <h1>-<h6> 等标签。
  256. """
  257. if not result.success:
  258. if result.skipped:
  259. return f"ℹ️ {_escape_html(result.error)}"
  260. return f"⚠️ AI 分析失败: {_escape_html(result.error)}"
  261. lines = ["<b>✨ AI 热点分析</b>", ""]
  262. if result.core_trends:
  263. lines.extend(["<b>核心热点态势</b>", _escape_html(_format_list_content(result.core_trends)), ""])
  264. if result.sentiment_controversy:
  265. lines.extend(["<b>舆论风向争议</b>", _escape_html(_format_list_content(result.sentiment_controversy)), ""])
  266. if result.signals:
  267. lines.extend(["<b>异动与弱信号</b>", _escape_html(_format_list_content(result.signals)), ""])
  268. if result.rss_insights:
  269. lines.extend(["<b>RSS 深度洞察</b>", _escape_html(_format_list_content(result.rss_insights)), ""])
  270. if result.outlook_strategy:
  271. lines.extend(["<b>研判策略建议</b>", _escape_html(_format_list_content(result.outlook_strategy)), ""])
  272. if result.standalone_summaries:
  273. summaries_text = _format_standalone_summaries(result.standalone_summaries)
  274. if summaries_text:
  275. lines.extend(["<b>独立源点速览</b>", _escape_html(summaries_text)])
  276. return "\n".join(lines)
  277. def get_ai_analysis_renderer(channel: str):
  278. """根据渠道获取对应的渲染函数"""
  279. renderers = {
  280. "feishu": render_ai_analysis_feishu,
  281. "dingtalk": render_ai_analysis_dingtalk,
  282. "wework": render_ai_analysis_markdown,
  283. "telegram": render_ai_analysis_telegram,
  284. "email": render_ai_analysis_html_rich, # 邮件使用丰富样式,配合 HTML 报告的 CSS
  285. "ntfy": render_ai_analysis_markdown,
  286. "bark": render_ai_analysis_plain,
  287. "slack": render_ai_analysis_markdown,
  288. }
  289. return renderers.get(channel, render_ai_analysis_markdown)
  290. def render_ai_analysis_html_rich(result: AIAnalysisResult) -> str:
  291. """渲染为丰富样式的 HTML 格式(HTML 报告用)"""
  292. if not result:
  293. return ""
  294. # 检查是否成功
  295. if not result.success:
  296. if result.skipped:
  297. return f"""
  298. <div class="ai-section">
  299. <div class="ai-info">ℹ️ {_escape_html(str(result.error))}</div>
  300. </div>"""
  301. error_msg = result.error or "未知错误"
  302. return f"""
  303. <div class="ai-section">
  304. <div class="ai-error">⚠️ AI 分析失败: {_escape_html(str(error_msg))}</div>
  305. </div>"""
  306. ai_html = """
  307. <div class="ai-section">
  308. <div class="ai-section-header">
  309. <div class="ai-section-title">✨ AI 热点分析</div>
  310. <span class="ai-section-badge">AI</span>
  311. </div>
  312. <div class="ai-blocks-grid">"""
  313. if result.core_trends:
  314. content = _format_list_content(result.core_trends)
  315. content_html = _escape_html(content).replace("\n", "<br>")
  316. ai_html += f"""
  317. <div class="ai-block">
  318. <div class="ai-block-title">核心热点态势</div>
  319. <div class="ai-block-content">{content_html}</div>
  320. </div>"""
  321. if result.sentiment_controversy:
  322. content = _format_list_content(result.sentiment_controversy)
  323. content_html = _escape_html(content).replace("\n", "<br>")
  324. ai_html += f"""
  325. <div class="ai-block">
  326. <div class="ai-block-title">舆论风向争议</div>
  327. <div class="ai-block-content">{content_html}</div>
  328. </div>"""
  329. if result.signals:
  330. content = _format_list_content(result.signals)
  331. content_html = _escape_html(content).replace("\n", "<br>")
  332. ai_html += f"""
  333. <div class="ai-block">
  334. <div class="ai-block-title">异动与弱信号</div>
  335. <div class="ai-block-content">{content_html}</div>
  336. </div>"""
  337. if result.rss_insights:
  338. content = _format_list_content(result.rss_insights)
  339. content_html = _escape_html(content).replace("\n", "<br>")
  340. ai_html += f"""
  341. <div class="ai-block">
  342. <div class="ai-block-title">RSS 深度洞察</div>
  343. <div class="ai-block-content">{content_html}</div>
  344. </div>"""
  345. if result.outlook_strategy:
  346. content = _format_list_content(result.outlook_strategy)
  347. content_html = _escape_html(content).replace("\n", "<br>")
  348. ai_html += f"""
  349. <div class="ai-block">
  350. <div class="ai-block-title">研判策略建议</div>
  351. <div class="ai-block-content">{content_html}</div>
  352. </div>"""
  353. if result.standalone_summaries:
  354. summaries_text = _format_standalone_summaries(result.standalone_summaries)
  355. if summaries_text:
  356. summaries_html = _escape_html(summaries_text).replace("\n", "<br>")
  357. ai_html += f"""
  358. <div class="ai-block">
  359. <div class="ai-block-title">独立源点速览</div>
  360. <div class="ai-block-content">{summaries_html}</div>
  361. </div>"""
  362. ai_html += """
  363. </div>
  364. </div>"""
  365. return ai_html