translator.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310
  1. # coding=utf-8
  2. """
  3. AI 翻译器模块
  4. 对推送内容进行多语言翻译
  5. 基于 LiteLLM 统一接口,支持 100+ AI 提供商
  6. """
  7. from dataclasses import dataclass, field
  8. from pathlib import Path
  9. from typing import Any, Dict, List
  10. from trendradar.ai.client import AIClient
  11. @dataclass
  12. class TranslationResult:
  13. """翻译结果"""
  14. translated_text: str = "" # 翻译后的文本
  15. original_text: str = "" # 原始文本
  16. success: bool = False # 是否成功
  17. error: str = "" # 错误信息
  18. @dataclass
  19. class BatchTranslationResult:
  20. """批量翻译结果"""
  21. results: List[TranslationResult] = field(default_factory=list)
  22. success_count: int = 0
  23. fail_count: int = 0
  24. total_count: int = 0
  25. prompt: str = "" # debug: 发送给 AI 的完整 prompt
  26. raw_response: str = "" # debug: AI 原始响应
  27. parsed_count: int = 0 # debug: AI 响应解析出的条目数
  28. class AITranslator:
  29. """AI 翻译器"""
  30. def __init__(self, translation_config: Dict[str, Any], ai_config: Dict[str, Any]):
  31. """
  32. 初始化 AI 翻译器
  33. Args:
  34. translation_config: AI 翻译配置 (AI_TRANSLATION)
  35. ai_config: AI 模型配置(LiteLLM 格式)
  36. """
  37. self.translation_config = translation_config
  38. self.ai_config = ai_config
  39. # 翻译配置
  40. self.enabled = translation_config.get("ENABLED", False)
  41. self.target_language = translation_config.get("LANGUAGE", "English")
  42. self.scope = translation_config.get("SCOPE", {"HOTLIST": True, "RSS": True, "STANDALONE": True})
  43. # 创建 AI 客户端(基于 LiteLLM)
  44. self.client = AIClient(ai_config)
  45. # 加载提示词模板
  46. self.system_prompt, self.user_prompt_template = self._load_prompt_template(
  47. translation_config.get("PROMPT_FILE", "ai_translation_prompt.txt")
  48. )
  49. def _load_prompt_template(self, prompt_file: str) -> tuple:
  50. """加载提示词模板"""
  51. config_dir = Path(__file__).parent.parent.parent / "config"
  52. prompt_path = config_dir / prompt_file
  53. if not prompt_path.exists():
  54. print(f"[翻译] 提示词文件不存在: {prompt_path}")
  55. return "", ""
  56. content = prompt_path.read_text(encoding="utf-8")
  57. # 解析 [system] 和 [user] 部分
  58. system_prompt = ""
  59. user_prompt = ""
  60. if "[system]" in content and "[user]" in content:
  61. parts = content.split("[user]")
  62. system_part = parts[0]
  63. user_part = parts[1] if len(parts) > 1 else ""
  64. if "[system]" in system_part:
  65. system_prompt = system_part.split("[system]")[1].strip()
  66. user_prompt = user_part.strip()
  67. else:
  68. user_prompt = content
  69. return system_prompt, user_prompt
  70. def translate(self, text: str) -> TranslationResult:
  71. """
  72. 翻译单条文本
  73. Args:
  74. text: 要翻译的文本
  75. Returns:
  76. TranslationResult: 翻译结果
  77. """
  78. result = TranslationResult(original_text=text)
  79. if not self.enabled:
  80. result.error = "翻译功能未启用"
  81. return result
  82. if not self.client.api_key:
  83. result.error = "未配置 AI API Key"
  84. return result
  85. if not text or not text.strip():
  86. result.translated_text = text
  87. result.success = True
  88. return result
  89. try:
  90. # 构建提示词
  91. user_prompt = self.user_prompt_template
  92. user_prompt = user_prompt.replace("{target_language}", self.target_language)
  93. user_prompt = user_prompt.replace("{content}", text)
  94. # 调用 AI API
  95. response = self._call_ai(user_prompt)
  96. result.translated_text = response.strip()
  97. result.success = True
  98. except Exception as e:
  99. error_type = type(e).__name__
  100. error_msg = str(e)
  101. if len(error_msg) > 100:
  102. error_msg = error_msg[:100] + "..."
  103. result.error = f"翻译失败 ({error_type}): {error_msg}"
  104. return result
  105. def translate_batch(self, texts: List[str]) -> BatchTranslationResult:
  106. """
  107. 批量翻译文本(单次 API 调用)
  108. Args:
  109. texts: 要翻译的文本列表
  110. Returns:
  111. BatchTranslationResult: 批量翻译结果
  112. """
  113. batch_result = BatchTranslationResult(total_count=len(texts))
  114. if not self.enabled:
  115. for text in texts:
  116. batch_result.results.append(TranslationResult(
  117. original_text=text,
  118. error="翻译功能未启用"
  119. ))
  120. batch_result.fail_count = len(texts)
  121. return batch_result
  122. if not self.client.api_key:
  123. for text in texts:
  124. batch_result.results.append(TranslationResult(
  125. original_text=text,
  126. error="未配置 AI API Key"
  127. ))
  128. batch_result.fail_count = len(texts)
  129. return batch_result
  130. if not texts:
  131. return batch_result
  132. # 过滤空文本
  133. non_empty_indices = []
  134. non_empty_texts = []
  135. for i, text in enumerate(texts):
  136. if text and text.strip():
  137. non_empty_indices.append(i)
  138. non_empty_texts.append(text)
  139. # 初始化结果列表
  140. for text in texts:
  141. batch_result.results.append(TranslationResult(original_text=text))
  142. # 空文本直接标记成功
  143. for i, text in enumerate(texts):
  144. if not text or not text.strip():
  145. batch_result.results[i].translated_text = text
  146. batch_result.results[i].success = True
  147. batch_result.success_count += 1
  148. if not non_empty_texts:
  149. return batch_result
  150. try:
  151. # 构建批量翻译内容(使用编号格式)
  152. batch_content = self._format_batch_content(non_empty_texts)
  153. # 构建提示词
  154. user_prompt = self.user_prompt_template
  155. user_prompt = user_prompt.replace("{target_language}", self.target_language)
  156. user_prompt = user_prompt.replace("{content}", batch_content)
  157. # 记录 debug 信息(包含完整的 system + user prompt)
  158. if self.system_prompt:
  159. batch_result.prompt = f"[system]\n{self.system_prompt}\n\n[user]\n{user_prompt}"
  160. else:
  161. batch_result.prompt = user_prompt
  162. # 调用 AI API
  163. response = self._call_ai(user_prompt)
  164. # 记录 AI 原始响应
  165. batch_result.raw_response = response
  166. # 解析批量翻译结果
  167. translated_texts, raw_parsed_count = self._parse_batch_response(response, len(non_empty_texts))
  168. batch_result.parsed_count = raw_parsed_count
  169. # 填充结果
  170. for idx, translated in zip(non_empty_indices, translated_texts):
  171. batch_result.results[idx].translated_text = translated
  172. batch_result.results[idx].success = True
  173. batch_result.success_count += 1
  174. except Exception as e:
  175. error_msg = f"批量翻译失败: {type(e).__name__}: {str(e)[:100]}"
  176. for idx in non_empty_indices:
  177. batch_result.results[idx].error = error_msg
  178. batch_result.fail_count = len(non_empty_indices)
  179. return batch_result
  180. def _format_batch_content(self, texts: List[str]) -> str:
  181. """格式化批量翻译内容"""
  182. lines = []
  183. for i, text in enumerate(texts, 1):
  184. lines.append(f"[{i}] {text}")
  185. return "\n".join(lines)
  186. def _parse_batch_response(self, response: str, expected_count: int) -> tuple:
  187. """
  188. 解析批量翻译响应
  189. Args:
  190. response: AI 响应文本
  191. expected_count: 期望的翻译数量
  192. Returns:
  193. tuple: (翻译结果列表, AI 原始解析出的条目数)
  194. """
  195. results = []
  196. lines = response.strip().split("\n")
  197. current_idx = None
  198. current_text = []
  199. for line in lines:
  200. # 尝试匹配 [数字] 格式
  201. stripped = line.strip()
  202. if stripped.startswith("[") and "]" in stripped:
  203. bracket_end = stripped.index("]")
  204. try:
  205. idx = int(stripped[1:bracket_end])
  206. # 保存之前的内容
  207. if current_idx is not None:
  208. results.append((current_idx, "\n".join(current_text).strip()))
  209. current_idx = idx
  210. current_text = [stripped[bracket_end + 1:].strip()]
  211. except ValueError:
  212. if current_idx is not None:
  213. current_text.append(line)
  214. else:
  215. if current_idx is not None:
  216. current_text.append(line)
  217. # 保存最后一条
  218. if current_idx is not None:
  219. results.append((current_idx, "\n".join(current_text).strip()))
  220. # 按索引排序并提取文本
  221. results.sort(key=lambda x: x[0])
  222. translated = [text for _, text in results]
  223. raw_parsed_count = len(translated)
  224. # 如果解析结果数量不匹配,尝试简单按行分割
  225. if len(translated) != expected_count:
  226. # 回退:按行分割(去除编号)
  227. translated = []
  228. for line in lines:
  229. stripped = line.strip()
  230. if stripped.startswith("[") and "]" in stripped:
  231. bracket_end = stripped.index("]")
  232. translated.append(stripped[bracket_end + 1:].strip())
  233. elif stripped:
  234. translated.append(stripped)
  235. raw_parsed_count = len(translated)
  236. # 确保返回正确数量
  237. while len(translated) < expected_count:
  238. translated.append("")
  239. return translated[:expected_count], raw_parsed_count
  240. def _call_ai(self, user_prompt: str) -> str:
  241. """调用 AI API(使用 LiteLLM)"""
  242. messages = []
  243. if self.system_prompt:
  244. messages.append({"role": "system", "content": self.system_prompt})
  245. messages.append({"role": "user", "content": user_prompt})
  246. return self.client.chat(messages)