translator.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287
  1. # coding=utf-8
  2. """
  3. AI 翻译器模块
  4. 对推送内容进行多语言翻译
  5. 基于 LiteLLM 统一接口,支持 100+ AI 提供商
  6. """
  7. from dataclasses import dataclass, field
  8. from typing import Any, Dict, List
  9. from trendradar.ai.client import AIClient
  10. from trendradar.ai.prompt_loader import load_prompt_template
  11. @dataclass
  12. class TranslationResult:
  13. """翻译结果"""
  14. translated_text: str = "" # 翻译后的文本
  15. original_text: str = "" # 原始文本
  16. success: bool = False # 是否成功
  17. error: str = "" # 错误信息
  18. @dataclass
  19. class BatchTranslationResult:
  20. """批量翻译结果"""
  21. results: List[TranslationResult] = field(default_factory=list)
  22. success_count: int = 0
  23. fail_count: int = 0
  24. total_count: int = 0
  25. prompt: str = "" # debug: 发送给 AI 的完整 prompt
  26. raw_response: str = "" # debug: AI 原始响应
  27. parsed_count: int = 0 # debug: AI 响应解析出的条目数
  28. class AITranslator:
  29. """AI 翻译器"""
  30. def __init__(self, translation_config: Dict[str, Any], ai_config: Dict[str, Any]):
  31. """
  32. 初始化 AI 翻译器
  33. Args:
  34. translation_config: AI 翻译配置 (AI_TRANSLATION)
  35. ai_config: AI 模型配置(LiteLLM 格式)
  36. """
  37. self.translation_config = translation_config
  38. self.ai_config = ai_config
  39. # 翻译配置
  40. self.enabled = translation_config.get("ENABLED", False)
  41. self.target_language = translation_config.get("LANGUAGE", "English")
  42. self.scope = translation_config.get("SCOPE", {"HOTLIST": True, "RSS": True, "STANDALONE": True})
  43. # 创建 AI 客户端(基于 LiteLLM)
  44. self.client = AIClient(ai_config)
  45. # 加载提示词模板
  46. self.system_prompt, self.user_prompt_template = load_prompt_template(
  47. translation_config.get("PROMPT_FILE", "ai_translation_prompt.txt"),
  48. label="翻译",
  49. )
  50. def translate(self, text: str) -> TranslationResult:
  51. """
  52. 翻译单条文本
  53. Args:
  54. text: 要翻译的文本
  55. Returns:
  56. TranslationResult: 翻译结果
  57. """
  58. result = TranslationResult(original_text=text)
  59. if not self.enabled:
  60. result.error = "翻译功能未启用"
  61. return result
  62. if not self.client.api_key:
  63. result.error = "未配置 AI API Key"
  64. return result
  65. if not text or not text.strip():
  66. result.translated_text = text
  67. result.success = True
  68. return result
  69. try:
  70. # 构建提示词
  71. user_prompt = self.user_prompt_template
  72. user_prompt = user_prompt.replace("{target_language}", self.target_language)
  73. user_prompt = user_prompt.replace("{content}", text)
  74. # 调用 AI API
  75. response = self._call_ai(user_prompt)
  76. result.translated_text = response.strip()
  77. result.success = True
  78. except Exception as e:
  79. error_type = type(e).__name__
  80. error_msg = str(e)
  81. if len(error_msg) > 100:
  82. error_msg = error_msg[:100] + "..."
  83. result.error = f"翻译失败 ({error_type}): {error_msg}"
  84. return result
  85. def translate_batch(self, texts: List[str]) -> BatchTranslationResult:
  86. """
  87. 批量翻译文本(单次 API 调用)
  88. Args:
  89. texts: 要翻译的文本列表
  90. Returns:
  91. BatchTranslationResult: 批量翻译结果
  92. """
  93. batch_result = BatchTranslationResult(total_count=len(texts))
  94. if not self.enabled:
  95. for text in texts:
  96. batch_result.results.append(TranslationResult(
  97. original_text=text,
  98. error="翻译功能未启用"
  99. ))
  100. batch_result.fail_count = len(texts)
  101. return batch_result
  102. if not self.client.api_key:
  103. for text in texts:
  104. batch_result.results.append(TranslationResult(
  105. original_text=text,
  106. error="未配置 AI API Key"
  107. ))
  108. batch_result.fail_count = len(texts)
  109. return batch_result
  110. if not texts:
  111. return batch_result
  112. # 过滤空文本
  113. non_empty_indices = []
  114. non_empty_texts = []
  115. for i, text in enumerate(texts):
  116. if text and text.strip():
  117. non_empty_indices.append(i)
  118. non_empty_texts.append(text)
  119. # 初始化结果列表
  120. for text in texts:
  121. batch_result.results.append(TranslationResult(original_text=text))
  122. # 空文本直接标记成功
  123. for i, text in enumerate(texts):
  124. if not text or not text.strip():
  125. batch_result.results[i].translated_text = text
  126. batch_result.results[i].success = True
  127. batch_result.success_count += 1
  128. if not non_empty_texts:
  129. return batch_result
  130. try:
  131. # 构建批量翻译内容(使用编号格式)
  132. batch_content = self._format_batch_content(non_empty_texts)
  133. # 构建提示词
  134. user_prompt = self.user_prompt_template
  135. user_prompt = user_prompt.replace("{target_language}", self.target_language)
  136. user_prompt = user_prompt.replace("{content}", batch_content)
  137. # 记录 debug 信息(包含完整的 system + user prompt)
  138. if self.system_prompt:
  139. batch_result.prompt = f"[system]\n{self.system_prompt}\n\n[user]\n{user_prompt}"
  140. else:
  141. batch_result.prompt = user_prompt
  142. # 调用 AI API
  143. response = self._call_ai(user_prompt)
  144. # 记录 AI 原始响应
  145. batch_result.raw_response = response
  146. # 解析批量翻译结果
  147. translated_texts, raw_parsed_count = self._parse_batch_response(response, len(non_empty_texts))
  148. batch_result.parsed_count = raw_parsed_count
  149. # 填充结果(跳过空翻译,避免用空字符串覆盖原始标题)
  150. for idx, translated in zip(non_empty_indices, translated_texts):
  151. if translated and translated.strip():
  152. batch_result.results[idx].translated_text = translated
  153. batch_result.results[idx].success = True
  154. batch_result.success_count += 1
  155. else:
  156. batch_result.results[idx].translated_text = batch_result.results[idx].original_text
  157. batch_result.results[idx].success = True
  158. batch_result.success_count += 1
  159. except Exception as e:
  160. error_msg = f"批量翻译失败: {type(e).__name__}: {str(e)[:100]}"
  161. for idx in non_empty_indices:
  162. batch_result.results[idx].error = error_msg
  163. batch_result.fail_count = len(non_empty_indices)
  164. return batch_result
  165. def _format_batch_content(self, texts: List[str]) -> str:
  166. """格式化批量翻译内容"""
  167. lines = []
  168. for i, text in enumerate(texts, 1):
  169. lines.append(f"[{i}] {text}")
  170. return "\n".join(lines)
  171. def _parse_batch_response(self, response: str, expected_count: int) -> tuple:
  172. """
  173. 解析批量翻译响应
  174. Args:
  175. response: AI 响应文本
  176. expected_count: 期望的翻译数量
  177. Returns:
  178. tuple: (翻译结果列表, AI 原始解析出的条目数)
  179. """
  180. results = []
  181. lines = response.strip().split("\n")
  182. current_idx = None
  183. current_text = []
  184. for line in lines:
  185. # 尝试匹配 [数字] 格式
  186. stripped = line.strip()
  187. if stripped.startswith("[") and "]" in stripped:
  188. bracket_end = stripped.index("]")
  189. try:
  190. idx = int(stripped[1:bracket_end])
  191. # 保存之前的内容
  192. if current_idx is not None:
  193. results.append((current_idx, "\n".join(current_text).strip()))
  194. current_idx = idx
  195. current_text = [stripped[bracket_end + 1:].strip()]
  196. except ValueError:
  197. if current_idx is not None:
  198. current_text.append(line)
  199. else:
  200. if current_idx is not None:
  201. current_text.append(line)
  202. # 保存最后一条
  203. if current_idx is not None:
  204. results.append((current_idx, "\n".join(current_text).strip()))
  205. # 按索引排序并提取文本
  206. results.sort(key=lambda x: x[0])
  207. translated = [text for _, text in results]
  208. raw_parsed_count = len(translated)
  209. # 如果解析结果数量不匹配,尝试简单按行分割
  210. if len(translated) != expected_count:
  211. # 回退:按行分割(去除编号)
  212. translated = []
  213. for line in lines:
  214. stripped = line.strip()
  215. if stripped.startswith("[") and "]" in stripped:
  216. bracket_end = stripped.index("]")
  217. translated.append(stripped[bracket_end + 1:].strip())
  218. elif stripped:
  219. translated.append(stripped)
  220. raw_parsed_count = len(translated)
  221. # 确保返回正确数量
  222. while len(translated) < expected_count:
  223. translated.append("")
  224. return translated[:expected_count], raw_parsed_count
  225. def _call_ai(self, user_prompt: str) -> str:
  226. """调用 AI API(使用 LiteLLM)"""
  227. messages = []
  228. if self.system_prompt:
  229. messages.append({"role": "system", "content": self.system_prompt})
  230. messages.append({"role": "user", "content": user_prompt})
  231. return self.client.chat(messages)