manage.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. 新闻爬虫容器管理工具 - supercronic
  5. """
  6. import os
  7. import sys
  8. import subprocess
  9. import time
  10. from pathlib import Path
  11. def run_command(cmd, shell=True, capture_output=True):
  12. """执行系统命令"""
  13. try:
  14. result = subprocess.run(
  15. cmd, shell=shell, capture_output=capture_output, text=True
  16. )
  17. return result.returncode == 0, result.stdout, result.stderr
  18. except Exception as e:
  19. return False, "", str(e)
  20. def manual_run():
  21. """手动执行一次爬虫"""
  22. print("🔄 手动执行爬虫...")
  23. try:
  24. result = subprocess.run(
  25. ["python", "main.py"], cwd="/app", capture_output=False, text=True
  26. )
  27. if result.returncode == 0:
  28. print("✅ 执行完成")
  29. else:
  30. print(f"❌ 执行失败,退出码: {result.returncode}")
  31. except Exception as e:
  32. print(f"❌ 执行出错: {e}")
  33. def parse_cron_schedule(cron_expr):
  34. """解析cron表达式并返回人类可读的描述"""
  35. if not cron_expr or cron_expr == "未设置":
  36. return "未设置"
  37. try:
  38. parts = cron_expr.strip().split()
  39. if len(parts) != 5:
  40. return f"原始表达式: {cron_expr}"
  41. minute, hour, day, month, weekday = parts
  42. # 分析分钟
  43. if minute == "*":
  44. minute_desc = "每分钟"
  45. elif minute.startswith("*/"):
  46. interval = minute[2:]
  47. minute_desc = f"每{interval}分钟"
  48. elif "," in minute:
  49. minute_desc = f"在第{minute}分钟"
  50. else:
  51. minute_desc = f"在第{minute}分钟"
  52. # 分析小时
  53. if hour == "*":
  54. hour_desc = "每小时"
  55. elif hour.startswith("*/"):
  56. interval = hour[2:]
  57. hour_desc = f"每{interval}小时"
  58. elif "," in hour:
  59. hour_desc = f"在{hour}点"
  60. else:
  61. hour_desc = f"在{hour}点"
  62. # 分析日期
  63. if day == "*":
  64. day_desc = "每天"
  65. elif day.startswith("*/"):
  66. interval = day[2:]
  67. day_desc = f"每{interval}天"
  68. else:
  69. day_desc = f"每月{day}号"
  70. # 分析月份
  71. if month == "*":
  72. month_desc = "每月"
  73. else:
  74. month_desc = f"在{month}月"
  75. # 分析星期
  76. weekday_names = {
  77. "0": "周日", "1": "周一", "2": "周二", "3": "周三",
  78. "4": "周四", "5": "周五", "6": "周六", "7": "周日"
  79. }
  80. if weekday == "*":
  81. weekday_desc = ""
  82. else:
  83. weekday_desc = f"在{weekday_names.get(weekday, weekday)}"
  84. # 组合描述
  85. if minute.startswith("*/") and hour == "*" and day == "*" and month == "*" and weekday == "*":
  86. # 简单的间隔模式,如 */30 * * * *
  87. return f"每{minute[2:]}分钟执行一次"
  88. elif hour != "*" and minute != "*" and day == "*" and month == "*" and weekday == "*":
  89. # 每天特定时间,如 0 9 * * *
  90. return f"每天{hour}:{minute.zfill(2)}执行"
  91. elif weekday != "*" and day == "*":
  92. # 每周特定时间
  93. return f"{weekday_desc}{hour}:{minute.zfill(2)}执行"
  94. else:
  95. # 复杂模式,显示详细信息
  96. desc_parts = [part for part in [month_desc, day_desc, weekday_desc, hour_desc, minute_desc] if part and part != "每月" and part != "每天" and part != "每小时"]
  97. if desc_parts:
  98. return " ".join(desc_parts) + "执行"
  99. else:
  100. return f"复杂表达式: {cron_expr}"
  101. except Exception as e:
  102. return f"解析失败: {cron_expr}"
  103. def show_status():
  104. """显示容器状态"""
  105. print("📊 容器状态:")
  106. # 检查 PID 1 状态
  107. supercronic_is_pid1 = False
  108. pid1_cmdline = ""
  109. try:
  110. with open('/proc/1/cmdline', 'r') as f:
  111. pid1_cmdline = f.read().replace('\x00', ' ').strip()
  112. print(f" 🔍 PID 1 进程: {pid1_cmdline}")
  113. if "supercronic" in pid1_cmdline.lower():
  114. print(" ✅ supercronic 正确运行为 PID 1")
  115. supercronic_is_pid1 = True
  116. else:
  117. print(" ❌ PID 1 不是 supercronic")
  118. print(f" 📋 实际的 PID 1: {pid1_cmdline}")
  119. except Exception as e:
  120. print(f" ❌ 无法读取 PID 1 信息: {e}")
  121. # 检查环境变量
  122. cron_schedule = os.environ.get("CRON_SCHEDULE", "未设置")
  123. run_mode = os.environ.get("RUN_MODE", "未设置")
  124. immediate_run = os.environ.get("IMMEDIATE_RUN", "未设置")
  125. print(f" ⚙️ 运行配置:")
  126. print(f" CRON_SCHEDULE: {cron_schedule}")
  127. # 解析并显示cron表达式的含义
  128. cron_description = parse_cron_schedule(cron_schedule)
  129. print(f" ⏰ 执行频率: {cron_description}")
  130. print(f" RUN_MODE: {run_mode}")
  131. print(f" IMMEDIATE_RUN: {immediate_run}")
  132. # 检查配置文件
  133. config_files = ["/app/config/config.yaml", "/app/config/frequency_words.txt"]
  134. print(" 📁 配置文件:")
  135. for file_path in config_files:
  136. if Path(file_path).exists():
  137. print(f" ✅ {Path(file_path).name}")
  138. else:
  139. print(f" ❌ {Path(file_path).name} 缺失")
  140. # 检查关键文件
  141. key_files = [
  142. ("/usr/local/bin/supercronic-linux-amd64", "supercronic二进制文件"),
  143. ("/usr/local/bin/supercronic", "supercronic软链接"),
  144. ("/tmp/crontab", "crontab文件"),
  145. ("/entrypoint.sh", "启动脚本")
  146. ]
  147. print(" 📂 关键文件检查:")
  148. for file_path, description in key_files:
  149. if Path(file_path).exists():
  150. print(f" ✅ {description}: 存在")
  151. # 对于crontab文件,显示内容
  152. if file_path == "/tmp/crontab":
  153. try:
  154. with open(file_path, 'r') as f:
  155. crontab_content = f.read().strip()
  156. print(f" 内容: {crontab_content}")
  157. except:
  158. pass
  159. else:
  160. print(f" ❌ {description}: 不存在")
  161. # 检查容器运行时间
  162. print(" ⏱️ 容器时间信息:")
  163. try:
  164. # 检查 PID 1 的启动时间
  165. with open('/proc/1/stat', 'r') as f:
  166. stat_content = f.read().strip().split()
  167. if len(stat_content) >= 22:
  168. # starttime 是第22个字段(索引21)
  169. starttime_ticks = int(stat_content[21])
  170. # 读取系统启动时间
  171. with open('/proc/stat', 'r') as stat_f:
  172. for line in stat_f:
  173. if line.startswith('btime'):
  174. boot_time = int(line.split()[1])
  175. break
  176. else:
  177. boot_time = 0
  178. # 读取系统时钟频率
  179. clock_ticks = os.sysconf(os.sysconf_names['SC_CLK_TCK'])
  180. if boot_time > 0:
  181. pid1_start_time = boot_time + (starttime_ticks / clock_ticks)
  182. current_time = time.time()
  183. uptime_seconds = int(current_time - pid1_start_time)
  184. uptime_minutes = uptime_seconds // 60
  185. uptime_hours = uptime_minutes // 60
  186. if uptime_hours > 0:
  187. print(f" PID 1 运行时间: {uptime_hours} 小时 {uptime_minutes % 60} 分钟")
  188. else:
  189. print(f" PID 1 运行时间: {uptime_minutes} 分钟 ({uptime_seconds} 秒)")
  190. else:
  191. print(f" PID 1 运行时间: 无法精确计算")
  192. else:
  193. print(" ❌ 无法解析 PID 1 统计信息")
  194. except Exception as e:
  195. print(f" ❌ 时间检查失败: {e}")
  196. # 状态总结和建议
  197. print(" 📊 状态总结:")
  198. if supercronic_is_pid1:
  199. print(" ✅ supercronic 正确运行为 PID 1")
  200. print(" ✅ 定时任务应该正常工作")
  201. # 显示当前的调度信息
  202. if cron_schedule != "未设置":
  203. print(f" ⏰ 当前调度: {cron_description}")
  204. # 提供一些常见的调度建议
  205. if "分钟" in cron_description and "每30分钟" not in cron_description and "每60分钟" not in cron_description:
  206. print(" 💡 频繁执行模式,适合实时监控")
  207. elif "小时" in cron_description:
  208. print(" 💡 按小时执行模式,适合定期汇总")
  209. elif "天" in cron_description:
  210. print(" 💡 每日执行模式,适合日报生成")
  211. print(" 💡 如果定时任务不执行,检查:")
  212. print(" • crontab 格式是否正确")
  213. print(" • 时区设置是否正确")
  214. print(" • 应用程序是否有错误")
  215. else:
  216. print(" ❌ supercronic 状态异常")
  217. if pid1_cmdline:
  218. print(f" 📋 当前 PID 1: {pid1_cmdline}")
  219. print(" 💡 建议操作:")
  220. print(" • 重启容器: docker restart trend-radar")
  221. print(" • 检查容器日志: docker logs trend-radar")
  222. # 显示日志检查建议
  223. print(" 📋 运行状态检查:")
  224. print(" • 查看完整容器日志: docker logs trend-radar")
  225. print(" • 查看实时日志: docker logs -f trend-radar")
  226. print(" • 手动执行测试: python manage.py run")
  227. print(" • 重启容器服务: docker restart trend-radar")
  228. def show_config():
  229. """显示当前配置"""
  230. print("⚙️ 当前配置:")
  231. env_vars = [
  232. "CRON_SCHEDULE",
  233. "RUN_MODE",
  234. "IMMEDIATE_RUN",
  235. "FEISHU_WEBHOOK_URL",
  236. "DINGTALK_WEBHOOK_URL",
  237. "WEWORK_WEBHOOK_URL",
  238. "TELEGRAM_BOT_TOKEN",
  239. "TELEGRAM_CHAT_ID",
  240. "CONFIG_PATH",
  241. "FREQUENCY_WORDS_PATH",
  242. ]
  243. for var in env_vars:
  244. value = os.environ.get(var, "未设置")
  245. # 隐藏敏感信息
  246. if any(sensitive in var for sensitive in ["WEBHOOK", "TOKEN", "KEY"]):
  247. if value and value != "未设置":
  248. masked_value = value[:10] + "***" if len(value) > 10 else "***"
  249. print(f" {var}: {masked_value}")
  250. else:
  251. print(f" {var}: {value}")
  252. else:
  253. print(f" {var}: {value}")
  254. crontab_file = "/tmp/crontab"
  255. if Path(crontab_file).exists():
  256. print(" 📅 Crontab内容:")
  257. try:
  258. with open(crontab_file, "r") as f:
  259. content = f.read().strip()
  260. print(f" {content}")
  261. except Exception as e:
  262. print(f" 读取失败: {e}")
  263. else:
  264. print(" 📅 Crontab文件不存在")
  265. def show_files():
  266. """显示输出文件"""
  267. print("📁 输出文件:")
  268. output_dir = Path("/app/output")
  269. if not output_dir.exists():
  270. print(" 📭 输出目录不存在")
  271. return
  272. # 显示最近的文件
  273. date_dirs = sorted([d for d in output_dir.iterdir() if d.is_dir()], reverse=True)
  274. if not date_dirs:
  275. print(" 📭 输出目录为空")
  276. return
  277. # 显示最近2天的文件
  278. for date_dir in date_dirs[:2]:
  279. print(f" 📅 {date_dir.name}:")
  280. for subdir in ["html", "txt"]:
  281. sub_path = date_dir / subdir
  282. if sub_path.exists():
  283. files = list(sub_path.glob("*"))
  284. if files:
  285. recent_files = sorted(
  286. files, key=lambda x: x.stat().st_mtime, reverse=True
  287. )[:3]
  288. print(f" 📂 {subdir}: {len(files)} 个文件")
  289. for file in recent_files:
  290. mtime = time.ctime(file.stat().st_mtime)
  291. size_kb = file.stat().st_size // 1024
  292. print(
  293. f" 📄 {file.name} ({size_kb}KB, {mtime.split()[3][:5]})"
  294. )
  295. else:
  296. print(f" 📂 {subdir}: 空")
  297. def show_logs():
  298. """显示实时日志"""
  299. print("📋 实时日志 (按 Ctrl+C 退出):")
  300. print("💡 提示: 这将显示 PID 1 进程的输出")
  301. try:
  302. # 尝试多种方法查看日志
  303. log_files = [
  304. "/proc/1/fd/1", # PID 1 的标准输出
  305. "/proc/1/fd/2", # PID 1 的标准错误
  306. ]
  307. for log_file in log_files:
  308. if Path(log_file).exists():
  309. print(f"📄 尝试读取: {log_file}")
  310. subprocess.run(["tail", "-f", log_file], check=True)
  311. break
  312. else:
  313. print("📋 无法找到标准日志文件,建议使用: docker logs trend-radar")
  314. except KeyboardInterrupt:
  315. print("\n👋 退出日志查看")
  316. except Exception as e:
  317. print(f"❌ 查看日志失败: {e}")
  318. print("💡 建议使用: docker logs trend-radar")
  319. def restart_supercronic():
  320. """重启supercronic进程"""
  321. print("🔄 重启supercronic...")
  322. print("⚠️ 注意: supercronic 是 PID 1,无法直接重启")
  323. # 检查当前 PID 1
  324. try:
  325. with open('/proc/1/cmdline', 'r') as f:
  326. pid1_cmdline = f.read().replace('\x00', ' ').strip()
  327. print(f" 🔍 当前 PID 1: {pid1_cmdline}")
  328. if "supercronic" in pid1_cmdline.lower():
  329. print(" ✅ PID 1 是 supercronic")
  330. print(" 💡 要重启 supercronic,需要重启整个容器:")
  331. print(" docker restart trend-radar")
  332. else:
  333. print(" ❌ PID 1 不是 supercronic,这是异常状态")
  334. print(" 💡 建议重启容器以修复问题:")
  335. print(" docker restart trend-radar")
  336. except Exception as e:
  337. print(f" ❌ 无法检查 PID 1: {e}")
  338. print(" 💡 建议重启容器: docker restart trend-radar")
  339. def show_help():
  340. """显示帮助信息"""
  341. help_text = """
  342. 🐳 TrendRadar 容器管理工具
  343. 📋 命令列表:
  344. run - 手动执行一次爬虫
  345. status - 显示容器运行状态
  346. config - 显示当前配置
  347. files - 显示输出文件
  348. logs - 实时查看日志
  349. restart - 重启说明
  350. help - 显示此帮助
  351. 📖 使用示例:
  352. # 在容器中执行
  353. python manage.py run
  354. python manage.py status
  355. python manage.py logs
  356. # 在宿主机执行
  357. docker exec -it trend-radar python manage.py run
  358. docker exec -it trend-radar python manage.py status
  359. docker logs trend-radar
  360. 💡 常用操作指南:
  361. 1. 检查运行状态: status
  362. - 查看 supercronic 是否为 PID 1
  363. - 检查配置文件和关键文件
  364. - 查看 cron 调度设置
  365. 2. 手动执行测试: run
  366. - 立即执行一次新闻爬取
  367. - 测试程序是否正常工作
  368. 3. 查看日志: logs
  369. - 实时监控运行情况
  370. - 也可使用: docker logs trend-radar
  371. 4. 重启服务: restart
  372. - 由于 supercronic 是 PID 1,需要重启整个容器
  373. - 使用: docker restart trend-radar
  374. """
  375. print(help_text)
  376. def main():
  377. if len(sys.argv) < 2:
  378. show_help()
  379. return
  380. command = sys.argv[1]
  381. commands = {
  382. "run": manual_run,
  383. "status": show_status,
  384. "config": show_config,
  385. "files": show_files,
  386. "logs": show_logs,
  387. "restart": restart_supercronic,
  388. "help": show_help,
  389. }
  390. if command in commands:
  391. try:
  392. commands[command]()
  393. except KeyboardInterrupt:
  394. print("\n👋 操作已取消")
  395. except Exception as e:
  396. print(f"❌ 执行出错: {e}")
  397. else:
  398. print(f"❌ 未知命令: {command}")
  399. print("运行 'python manage.py help' 查看可用命令")
  400. if __name__ == "__main__":
  401. main()