translator.py 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295
  1. # coding=utf-8
  2. """
  3. AI 翻译器模块
  4. 对推送内容进行多语言翻译
  5. 基于 LiteLLM 统一接口,支持 100+ AI 提供商
  6. """
  7. import json
  8. from dataclasses import dataclass, field
  9. from pathlib import Path
  10. from typing import Any, Dict, List, Optional
  11. from trendradar.ai.client import AIClient
  12. @dataclass
  13. class TranslationResult:
  14. """翻译结果"""
  15. translated_text: str = "" # 翻译后的文本
  16. original_text: str = "" # 原始文本
  17. success: bool = False # 是否成功
  18. error: str = "" # 错误信息
  19. @dataclass
  20. class BatchTranslationResult:
  21. """批量翻译结果"""
  22. results: List[TranslationResult] = field(default_factory=list)
  23. success_count: int = 0
  24. fail_count: int = 0
  25. total_count: int = 0
  26. class AITranslator:
  27. """AI 翻译器"""
  28. def __init__(self, translation_config: Dict[str, Any], ai_config: Dict[str, Any]):
  29. """
  30. 初始化 AI 翻译器
  31. Args:
  32. translation_config: AI 翻译配置 (AI_TRANSLATION)
  33. ai_config: AI 模型配置(LiteLLM 格式)
  34. """
  35. self.translation_config = translation_config
  36. self.ai_config = ai_config
  37. # 翻译配置
  38. self.enabled = translation_config.get("ENABLED", False)
  39. self.target_language = translation_config.get("LANGUAGE", "English")
  40. # 创建 AI 客户端(基于 LiteLLM)
  41. self.client = AIClient(ai_config)
  42. # 加载提示词模板
  43. self.system_prompt, self.user_prompt_template = self._load_prompt_template(
  44. translation_config.get("PROMPT_FILE", "ai_translation_prompt.txt")
  45. )
  46. def _load_prompt_template(self, prompt_file: str) -> tuple:
  47. """加载提示词模板"""
  48. config_dir = Path(__file__).parent.parent.parent / "config"
  49. prompt_path = config_dir / prompt_file
  50. if not prompt_path.exists():
  51. print(f"[翻译] 提示词文件不存在: {prompt_path}")
  52. return "", ""
  53. content = prompt_path.read_text(encoding="utf-8")
  54. # 解析 [system] 和 [user] 部分
  55. system_prompt = ""
  56. user_prompt = ""
  57. if "[system]" in content and "[user]" in content:
  58. parts = content.split("[user]")
  59. system_part = parts[0]
  60. user_part = parts[1] if len(parts) > 1 else ""
  61. if "[system]" in system_part:
  62. system_prompt = system_part.split("[system]")[1].strip()
  63. user_prompt = user_part.strip()
  64. else:
  65. user_prompt = content
  66. return system_prompt, user_prompt
  67. def translate(self, text: str) -> TranslationResult:
  68. """
  69. 翻译单条文本
  70. Args:
  71. text: 要翻译的文本
  72. Returns:
  73. TranslationResult: 翻译结果
  74. """
  75. result = TranslationResult(original_text=text)
  76. if not self.enabled:
  77. result.error = "翻译功能未启用"
  78. return result
  79. if not self.client.api_key:
  80. result.error = "未配置 AI API Key"
  81. return result
  82. if not text or not text.strip():
  83. result.translated_text = text
  84. result.success = True
  85. return result
  86. try:
  87. # 构建提示词
  88. user_prompt = self.user_prompt_template
  89. user_prompt = user_prompt.replace("{target_language}", self.target_language)
  90. user_prompt = user_prompt.replace("{content}", text)
  91. # 调用 AI API
  92. response = self._call_ai(user_prompt)
  93. result.translated_text = response.strip()
  94. result.success = True
  95. except Exception as e:
  96. error_type = type(e).__name__
  97. error_msg = str(e)
  98. if len(error_msg) > 100:
  99. error_msg = error_msg[:100] + "..."
  100. result.error = f"翻译失败 ({error_type}): {error_msg}"
  101. return result
  102. def translate_batch(self, texts: List[str]) -> BatchTranslationResult:
  103. """
  104. 批量翻译文本(单次 API 调用)
  105. Args:
  106. texts: 要翻译的文本列表
  107. Returns:
  108. BatchTranslationResult: 批量翻译结果
  109. """
  110. batch_result = BatchTranslationResult(total_count=len(texts))
  111. if not self.enabled:
  112. for text in texts:
  113. batch_result.results.append(TranslationResult(
  114. original_text=text,
  115. error="翻译功能未启用"
  116. ))
  117. batch_result.fail_count = len(texts)
  118. return batch_result
  119. if not self.client.api_key:
  120. for text in texts:
  121. batch_result.results.append(TranslationResult(
  122. original_text=text,
  123. error="未配置 AI API Key"
  124. ))
  125. batch_result.fail_count = len(texts)
  126. return batch_result
  127. if not texts:
  128. return batch_result
  129. # 过滤空文本
  130. non_empty_indices = []
  131. non_empty_texts = []
  132. for i, text in enumerate(texts):
  133. if text and text.strip():
  134. non_empty_indices.append(i)
  135. non_empty_texts.append(text)
  136. # 初始化结果列表
  137. for text in texts:
  138. batch_result.results.append(TranslationResult(original_text=text))
  139. # 空文本直接标记成功
  140. for i, text in enumerate(texts):
  141. if not text or not text.strip():
  142. batch_result.results[i].translated_text = text
  143. batch_result.results[i].success = True
  144. batch_result.success_count += 1
  145. if not non_empty_texts:
  146. return batch_result
  147. try:
  148. # 构建批量翻译内容(使用编号格式)
  149. batch_content = self._format_batch_content(non_empty_texts)
  150. # 构建提示词
  151. user_prompt = self.user_prompt_template
  152. user_prompt = user_prompt.replace("{target_language}", self.target_language)
  153. user_prompt = user_prompt.replace("{content}", batch_content)
  154. # 调用 AI API
  155. response = self._call_ai(user_prompt)
  156. # 解析批量翻译结果
  157. translated_texts = self._parse_batch_response(response, len(non_empty_texts))
  158. # 填充结果
  159. for idx, translated in zip(non_empty_indices, translated_texts):
  160. batch_result.results[idx].translated_text = translated
  161. batch_result.results[idx].success = True
  162. batch_result.success_count += 1
  163. except Exception as e:
  164. error_msg = f"批量翻译失败: {type(e).__name__}: {str(e)[:100]}"
  165. for idx in non_empty_indices:
  166. batch_result.results[idx].error = error_msg
  167. batch_result.fail_count = len(non_empty_indices)
  168. return batch_result
  169. def _format_batch_content(self, texts: List[str]) -> str:
  170. """格式化批量翻译内容"""
  171. lines = []
  172. for i, text in enumerate(texts, 1):
  173. lines.append(f"[{i}] {text}")
  174. return "\n".join(lines)
  175. def _parse_batch_response(self, response: str, expected_count: int) -> List[str]:
  176. """
  177. 解析批量翻译响应
  178. Args:
  179. response: AI 响应文本
  180. expected_count: 期望的翻译数量
  181. Returns:
  182. List[str]: 翻译结果列表
  183. """
  184. results = []
  185. lines = response.strip().split("\n")
  186. current_idx = None
  187. current_text = []
  188. for line in lines:
  189. # 尝试匹配 [数字] 格式
  190. stripped = line.strip()
  191. if stripped.startswith("[") and "]" in stripped:
  192. bracket_end = stripped.index("]")
  193. try:
  194. idx = int(stripped[1:bracket_end])
  195. # 保存之前的内容
  196. if current_idx is not None:
  197. results.append((current_idx, "\n".join(current_text).strip()))
  198. current_idx = idx
  199. current_text = [stripped[bracket_end + 1:].strip()]
  200. except ValueError:
  201. if current_idx is not None:
  202. current_text.append(line)
  203. else:
  204. if current_idx is not None:
  205. current_text.append(line)
  206. # 保存最后一条
  207. if current_idx is not None:
  208. results.append((current_idx, "\n".join(current_text).strip()))
  209. # 按索引排序并提取文本
  210. results.sort(key=lambda x: x[0])
  211. translated = [text for _, text in results]
  212. # 如果解析结果数量不匹配,尝试简单按行分割
  213. if len(translated) != expected_count:
  214. # 回退:按行分割(去除编号)
  215. translated = []
  216. for line in lines:
  217. stripped = line.strip()
  218. if stripped.startswith("[") and "]" in stripped:
  219. bracket_end = stripped.index("]")
  220. translated.append(stripped[bracket_end + 1:].strip())
  221. elif stripped:
  222. translated.append(stripped)
  223. # 确保返回正确数量
  224. while len(translated) < expected_count:
  225. translated.append("")
  226. return translated[:expected_count]
  227. def _call_ai(self, user_prompt: str) -> str:
  228. """调用 AI API(使用 LiteLLM)"""
  229. messages = []
  230. if self.system_prompt:
  231. messages.append({"role": "system", "content": self.system_prompt})
  232. messages.append({"role": "user", "content": user_prompt})
  233. return self.client.chat(messages)