manage.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. 新闻爬虫容器管理工具 - supercronic
  5. """
  6. import os
  7. import sys
  8. import subprocess
  9. import time
  10. import signal
  11. from pathlib import Path
  12. # Web 服务器配置
  13. WEBSERVER_PORT = int(os.environ.get("WEBSERVER_PORT", "8080"))
  14. WEBSERVER_DIR = "/app/output"
  15. WEBSERVER_PID_FILE = "/tmp/webserver.pid"
  16. def run_command(cmd, shell=True, capture_output=True):
  17. """执行系统命令"""
  18. try:
  19. result = subprocess.run(
  20. cmd, shell=shell, capture_output=capture_output, text=True
  21. )
  22. return result.returncode == 0, result.stdout, result.stderr
  23. except Exception as e:
  24. return False, "", str(e)
  25. def manual_run():
  26. """手动执行一次爬虫"""
  27. print("🔄 手动执行爬虫...")
  28. try:
  29. result = subprocess.run(
  30. ["python", "-m", "trendradar"], cwd="/app", capture_output=False, text=True
  31. )
  32. if result.returncode == 0:
  33. print("✅ 执行完成")
  34. else:
  35. print(f"❌ 执行失败,退出码: {result.returncode}")
  36. except Exception as e:
  37. print(f"❌ 执行出错: {e}")
  38. def parse_cron_schedule(cron_expr):
  39. """解析cron表达式并返回人类可读的描述"""
  40. if not cron_expr or cron_expr == "未设置":
  41. return "未设置"
  42. try:
  43. parts = cron_expr.strip().split()
  44. if len(parts) != 5:
  45. return f"原始表达式: {cron_expr}"
  46. minute, hour, day, month, weekday = parts
  47. # 分析分钟
  48. if minute == "*":
  49. minute_desc = "每分钟"
  50. elif minute.startswith("*/"):
  51. interval = minute[2:]
  52. minute_desc = f"每{interval}分钟"
  53. elif "," in minute:
  54. minute_desc = f"在第{minute}分钟"
  55. else:
  56. minute_desc = f"在第{minute}分钟"
  57. # 分析小时
  58. if hour == "*":
  59. hour_desc = "每小时"
  60. elif hour.startswith("*/"):
  61. interval = hour[2:]
  62. hour_desc = f"每{interval}小时"
  63. elif "," in hour:
  64. hour_desc = f"在{hour}点"
  65. else:
  66. hour_desc = f"在{hour}点"
  67. # 分析日期
  68. if day == "*":
  69. day_desc = "每天"
  70. elif day.startswith("*/"):
  71. interval = day[2:]
  72. day_desc = f"每{interval}天"
  73. else:
  74. day_desc = f"每月{day}号"
  75. # 分析月份
  76. if month == "*":
  77. month_desc = "每月"
  78. else:
  79. month_desc = f"在{month}月"
  80. # 分析星期
  81. weekday_names = {
  82. "0": "周日", "1": "周一", "2": "周二", "3": "周三",
  83. "4": "周四", "5": "周五", "6": "周六", "7": "周日"
  84. }
  85. if weekday == "*":
  86. weekday_desc = ""
  87. else:
  88. weekday_desc = f"在{weekday_names.get(weekday, weekday)}"
  89. # 组合描述
  90. if minute.startswith("*/") and hour == "*" and day == "*" and month == "*" and weekday == "*":
  91. # 简单的间隔模式,如 */30 * * * *
  92. return f"每{minute[2:]}分钟执行一次"
  93. elif hour != "*" and minute != "*" and day == "*" and month == "*" and weekday == "*":
  94. # 每天特定时间,如 0 9 * * *
  95. return f"每天{hour}:{minute.zfill(2)}执行"
  96. elif weekday != "*" and day == "*":
  97. # 每周特定时间
  98. return f"{weekday_desc}{hour}:{minute.zfill(2)}执行"
  99. else:
  100. # 复杂模式,显示详细信息
  101. desc_parts = [part for part in [month_desc, day_desc, weekday_desc, hour_desc, minute_desc] if part and part != "每月" and part != "每天" and part != "每小时"]
  102. if desc_parts:
  103. return " ".join(desc_parts) + "执行"
  104. else:
  105. return f"复杂表达式: {cron_expr}"
  106. except Exception as e:
  107. return f"解析失败: {cron_expr}"
  108. def show_status():
  109. """显示容器状态"""
  110. print("📊 容器状态:")
  111. # 检查 PID 1 状态
  112. supercronic_is_pid1 = False
  113. pid1_cmdline = ""
  114. try:
  115. with open('/proc/1/cmdline', 'r') as f:
  116. pid1_cmdline = f.read().replace('\x00', ' ').strip()
  117. print(f" 🔍 PID 1 进程: {pid1_cmdline}")
  118. if "supercronic" in pid1_cmdline.lower():
  119. print(" ✅ supercronic 正确运行为 PID 1")
  120. supercronic_is_pid1 = True
  121. else:
  122. print(" ❌ PID 1 不是 supercronic")
  123. print(f" 📋 实际的 PID 1: {pid1_cmdline}")
  124. except Exception as e:
  125. print(f" ❌ 无法读取 PID 1 信息: {e}")
  126. # 检查环境变量
  127. cron_schedule = os.environ.get("CRON_SCHEDULE", "未设置")
  128. run_mode = os.environ.get("RUN_MODE", "未设置")
  129. immediate_run = os.environ.get("IMMEDIATE_RUN", "未设置")
  130. print(f" ⚙️ 运行配置:")
  131. print(f" CRON_SCHEDULE: {cron_schedule}")
  132. # 解析并显示cron表达式的含义
  133. cron_description = parse_cron_schedule(cron_schedule)
  134. print(f" ⏰ 执行频率: {cron_description}")
  135. print(f" RUN_MODE: {run_mode}")
  136. print(f" IMMEDIATE_RUN: {immediate_run}")
  137. # 检查配置文件
  138. config_files = ["/app/config/config.yaml", "/app/config/frequency_words.txt"]
  139. print(" 📁 配置文件:")
  140. for file_path in config_files:
  141. if Path(file_path).exists():
  142. print(f" ✅ {Path(file_path).name}")
  143. else:
  144. print(f" ❌ {Path(file_path).name} 缺失")
  145. # 检查关键文件
  146. key_files = [
  147. ("/usr/local/bin/supercronic-linux-amd64", "supercronic二进制文件"),
  148. ("/usr/local/bin/supercronic", "supercronic软链接"),
  149. ("/tmp/crontab", "crontab文件"),
  150. ("/entrypoint.sh", "启动脚本")
  151. ]
  152. print(" 📂 关键文件检查:")
  153. for file_path, description in key_files:
  154. if Path(file_path).exists():
  155. print(f" ✅ {description}: 存在")
  156. # 对于crontab文件,显示内容
  157. if file_path == "/tmp/crontab":
  158. try:
  159. with open(file_path, 'r') as f:
  160. crontab_content = f.read().strip()
  161. print(f" 内容: {crontab_content}")
  162. except:
  163. pass
  164. else:
  165. print(f" ❌ {description}: 不存在")
  166. # 检查容器运行时间
  167. print(" ⏱️ 容器时间信息:")
  168. try:
  169. # 检查 PID 1 的启动时间
  170. with open('/proc/1/stat', 'r') as f:
  171. stat_content = f.read().strip().split()
  172. if len(stat_content) >= 22:
  173. # starttime 是第22个字段(索引21)
  174. starttime_ticks = int(stat_content[21])
  175. # 读取系统启动时间
  176. with open('/proc/stat', 'r') as stat_f:
  177. for line in stat_f:
  178. if line.startswith('btime'):
  179. boot_time = int(line.split()[1])
  180. break
  181. else:
  182. boot_time = 0
  183. # 读取系统时钟频率
  184. clock_ticks = os.sysconf(os.sysconf_names['SC_CLK_TCK'])
  185. if boot_time > 0:
  186. pid1_start_time = boot_time + (starttime_ticks / clock_ticks)
  187. current_time = time.time()
  188. uptime_seconds = int(current_time - pid1_start_time)
  189. uptime_minutes = uptime_seconds // 60
  190. uptime_hours = uptime_minutes // 60
  191. if uptime_hours > 0:
  192. print(f" PID 1 运行时间: {uptime_hours} 小时 {uptime_minutes % 60} 分钟")
  193. else:
  194. print(f" PID 1 运行时间: {uptime_minutes} 分钟 ({uptime_seconds} 秒)")
  195. else:
  196. print(f" PID 1 运行时间: 无法精确计算")
  197. else:
  198. print(" ❌ 无法解析 PID 1 统计信息")
  199. except Exception as e:
  200. print(f" ❌ 时间检查失败: {e}")
  201. # 状态总结和建议
  202. print(" 📊 状态总结:")
  203. if supercronic_is_pid1:
  204. print(" ✅ supercronic 正确运行为 PID 1")
  205. print(" ✅ 定时任务应该正常工作")
  206. # 显示当前的调度信息
  207. if cron_schedule != "未设置":
  208. print(f" ⏰ 当前调度: {cron_description}")
  209. # 提供一些常见的调度建议
  210. if "分钟" in cron_description and "每30分钟" not in cron_description and "每60分钟" not in cron_description:
  211. print(" 💡 频繁执行模式,适合实时监控")
  212. elif "小时" in cron_description:
  213. print(" 💡 按小时执行模式,适合定期汇总")
  214. elif "天" in cron_description:
  215. print(" 💡 每日执行模式,适合日报生成")
  216. print(" 💡 如果定时任务不执行,检查:")
  217. print(" • crontab 格式是否正确")
  218. print(" • 时区设置是否正确")
  219. print(" • 应用程序是否有错误")
  220. else:
  221. print(" ❌ supercronic 状态异常")
  222. if pid1_cmdline:
  223. print(f" 📋 当前 PID 1: {pid1_cmdline}")
  224. print(" 💡 建议操作:")
  225. print(" • 重启容器: docker restart trendradar")
  226. print(" • 检查容器日志: docker logs trendradar")
  227. # 显示日志检查建议
  228. print(" 📋 运行状态检查:")
  229. print(" • 查看完整容器日志: docker logs trendradar")
  230. print(" • 查看实时日志: docker logs -f trendradar")
  231. print(" • 手动执行测试: python manage.py run")
  232. print(" • 重启容器服务: docker restart trendradar")
  233. def show_config():
  234. """显示当前配置"""
  235. print("⚙️ 当前配置:")
  236. env_vars = [
  237. # 运行配置
  238. "CRON_SCHEDULE",
  239. "RUN_MODE",
  240. "IMMEDIATE_RUN",
  241. # 核心配置
  242. "ENABLE_CRAWLER",
  243. "ENABLE_NOTIFICATION",
  244. "REPORT_MODE",
  245. "DISPLAY_MODE",
  246. # 通知渠道
  247. "FEISHU_WEBHOOK_URL",
  248. "DINGTALK_WEBHOOK_URL",
  249. "WEWORK_WEBHOOK_URL",
  250. "WEWORK_MSG_TYPE",
  251. "TELEGRAM_BOT_TOKEN",
  252. "TELEGRAM_CHAT_ID",
  253. "NTFY_SERVER_URL",
  254. "NTFY_TOPIC",
  255. "NTFY_TOKEN",
  256. "BARK_URL",
  257. "SLACK_WEBHOOK_URL",
  258. # AI 分析配置
  259. "AI_ANALYSIS_ENABLED",
  260. "AI_API_KEY",
  261. "AI_PROVIDER",
  262. "AI_MODEL",
  263. "AI_BASE_URL",
  264. # 远程存储配置
  265. "S3_BUCKET_NAME",
  266. "S3_ACCESS_KEY_ID",
  267. "S3_ENDPOINT_URL",
  268. "S3_REGION",
  269. ]
  270. for var in env_vars:
  271. value = os.environ.get(var, "未设置")
  272. # 隐藏敏感信息
  273. if any(sensitive in var for sensitive in ["WEBHOOK", "TOKEN", "KEY", "SECRET"]):
  274. if value and value != "未设置":
  275. masked_value = value[:10] + "***" if len(value) > 10 else "***"
  276. print(f" {var}: {masked_value}")
  277. else:
  278. print(f" {var}: {value}")
  279. else:
  280. print(f" {var}: {value}")
  281. crontab_file = "/tmp/crontab"
  282. if Path(crontab_file).exists():
  283. print(" 📅 Crontab内容:")
  284. try:
  285. with open(crontab_file, "r") as f:
  286. content = f.read().strip()
  287. print(f" {content}")
  288. except Exception as e:
  289. print(f" 读取失败: {e}")
  290. else:
  291. print(" 📅 Crontab文件不存在")
  292. def show_files():
  293. """显示输出文件"""
  294. print("📁 输出文件:")
  295. output_dir = Path("/app/output")
  296. if not output_dir.exists():
  297. print(" 📭 输出目录不存在")
  298. return
  299. # 新结构:扁平化目录
  300. # - output/news/*.db
  301. # - output/rss/*.db
  302. # - output/txt/{date}/*.txt
  303. # - output/html/{date}/*.html
  304. # 检查 news 数据库
  305. news_dir = output_dir / "news"
  306. if news_dir.exists():
  307. db_files = sorted(news_dir.glob("*.db"), key=lambda x: x.name, reverse=True)
  308. if db_files:
  309. print(f" 💾 热榜数据库 (news/): {len(db_files)} 个")
  310. for db_file in db_files[:5]:
  311. mtime = time.ctime(db_file.stat().st_mtime)
  312. size_kb = db_file.stat().st_size // 1024
  313. print(f" 📀 {db_file.name} ({size_kb}KB, {mtime.split()[3][:5]})")
  314. if len(db_files) > 5:
  315. print(f" ... 还有 {len(db_files) - 5} 个")
  316. # 检查 RSS 数据库
  317. rss_dir = output_dir / "rss"
  318. if rss_dir.exists():
  319. db_files = sorted(rss_dir.glob("*.db"), key=lambda x: x.name, reverse=True)
  320. if db_files:
  321. print(f" 📰 RSS 数据库 (rss/): {len(db_files)} 个")
  322. for db_file in db_files[:5]:
  323. mtime = time.ctime(db_file.stat().st_mtime)
  324. size_kb = db_file.stat().st_size // 1024
  325. print(f" 📀 {db_file.name} ({size_kb}KB, {mtime.split()[3][:5]})")
  326. if len(db_files) > 5:
  327. print(f" ... 还有 {len(db_files) - 5} 个")
  328. # 检查 TXT 快照目录
  329. txt_dir = output_dir / "txt"
  330. if txt_dir.exists():
  331. date_dirs = sorted([d for d in txt_dir.iterdir() if d.is_dir()], reverse=True)
  332. if date_dirs:
  333. print(f" 📄 TXT 快照 (txt/): {len(date_dirs)} 天")
  334. for date_dir in date_dirs[:3]:
  335. txt_files = list(date_dir.glob("*.txt"))
  336. if txt_files:
  337. recent = sorted(txt_files, key=lambda x: x.stat().st_mtime, reverse=True)[0]
  338. mtime = time.ctime(recent.stat().st_mtime)
  339. print(f" 📅 {date_dir.name}: {len(txt_files)} 个文件 (最新: {mtime.split()[3][:5]})")
  340. # 检查 HTML 报告目录
  341. html_dir = output_dir / "html"
  342. if html_dir.exists():
  343. date_dirs = sorted([d for d in html_dir.iterdir() if d.is_dir()], reverse=True)
  344. if date_dirs:
  345. print(f" 🌐 HTML 报告 (html/): {len(date_dirs)} 天")
  346. for date_dir in date_dirs[:3]:
  347. html_files = list(date_dir.glob("*.html"))
  348. if html_files:
  349. recent = sorted(html_files, key=lambda x: x.stat().st_mtime, reverse=True)[0]
  350. mtime = time.ctime(recent.stat().st_mtime)
  351. print(f" 📅 {date_dir.name}: {len(html_files)} 个文件 (最新: {mtime.split()[3][:5]})")
  352. def show_logs():
  353. """显示实时日志"""
  354. print("📋 实时日志 (按 Ctrl+C 退出):")
  355. print("💡 提示: 这将显示 PID 1 进程的输出")
  356. try:
  357. # 尝试多种方法查看日志
  358. log_files = [
  359. "/proc/1/fd/1", # PID 1 的标准输出
  360. "/proc/1/fd/2", # PID 1 的标准错误
  361. ]
  362. for log_file in log_files:
  363. if Path(log_file).exists():
  364. print(f"📄 尝试读取: {log_file}")
  365. subprocess.run(["tail", "-f", log_file], check=True)
  366. break
  367. else:
  368. print("📋 无法找到标准日志文件,建议使用: docker logs trendradar")
  369. except KeyboardInterrupt:
  370. print("\n👋 退出日志查看")
  371. except Exception as e:
  372. print(f"❌ 查看日志失败: {e}")
  373. print("💡 建议使用: docker logs trendradar")
  374. def restart_supercronic():
  375. """重启supercronic进程"""
  376. print("🔄 重启supercronic...")
  377. print("⚠️ 注意: supercronic 是 PID 1,无法直接重启")
  378. # 检查当前 PID 1
  379. try:
  380. with open('/proc/1/cmdline', 'r') as f:
  381. pid1_cmdline = f.read().replace('\x00', ' ').strip()
  382. print(f" 🔍 当前 PID 1: {pid1_cmdline}")
  383. if "supercronic" in pid1_cmdline.lower():
  384. print(" ✅ PID 1 是 supercronic")
  385. print(" 💡 要重启 supercronic,需要重启整个容器:")
  386. print(" docker restart trendradar")
  387. else:
  388. print(" ❌ PID 1 不是 supercronic,这是异常状态")
  389. print(" 💡 建议重启容器以修复问题:")
  390. print(" docker restart trendradar")
  391. except Exception as e:
  392. print(f" ❌ 无法检查 PID 1: {e}")
  393. print(" 💡 建议重启容器: docker restart trendradar")
  394. def start_webserver():
  395. """启动 Web 服务器托管 output 目录"""
  396. print(f"🌐 启动 Web 服务器 (端口: {WEBSERVER_PORT})...")
  397. print(f" 🔒 安全提示:仅提供静态文件访问,限制在 {WEBSERVER_DIR} 目录")
  398. # 检查是否已经运行
  399. if Path(WEBSERVER_PID_FILE).exists():
  400. try:
  401. with open(WEBSERVER_PID_FILE, 'r') as f:
  402. old_pid = int(f.read().strip())
  403. try:
  404. os.kill(old_pid, 0) # 检查进程是否存在
  405. print(f" ⚠️ Web 服务器已在运行 (PID: {old_pid})")
  406. print(f" 💡 访问: http://localhost:{WEBSERVER_PORT}")
  407. print(" 💡 停止服务: python manage.py stop_webserver")
  408. return
  409. except OSError:
  410. # 进程不存在,删除旧的 PID 文件
  411. os.remove(WEBSERVER_PID_FILE)
  412. except Exception as e:
  413. print(f" ⚠️ 清理旧的 PID 文件: {e}")
  414. try:
  415. os.remove(WEBSERVER_PID_FILE)
  416. except:
  417. pass
  418. # 检查目录是否存在
  419. if not Path(WEBSERVER_DIR).exists():
  420. print(f" ❌ 目录不存在: {WEBSERVER_DIR}")
  421. return
  422. try:
  423. # 启动 HTTP 服务器
  424. # 使用 --bind 绑定到 0.0.0.0 使容器内部可访问
  425. # 工作目录限制在 WEBSERVER_DIR,防止访问其他目录
  426. process = subprocess.Popen(
  427. [sys.executable, '-m', 'http.server', str(WEBSERVER_PORT), '--bind', '0.0.0.0'],
  428. cwd=WEBSERVER_DIR,
  429. stdout=subprocess.DEVNULL,
  430. stderr=subprocess.DEVNULL,
  431. start_new_session=True
  432. )
  433. # 等待一下确保服务器启动
  434. time.sleep(1)
  435. # 检查进程是否还在运行
  436. if process.poll() is None:
  437. # 保存 PID
  438. with open(WEBSERVER_PID_FILE, 'w') as f:
  439. f.write(str(process.pid))
  440. print(f" ✅ Web 服务器已启动 (PID: {process.pid})")
  441. print(f" 📁 服务目录: {WEBSERVER_DIR} (只读,仅静态文件)")
  442. print(f" 🌐 访问地址: http://localhost:{WEBSERVER_PORT}")
  443. print(f" 📄 首页: http://localhost:{WEBSERVER_PORT}/index.html")
  444. print(" 💡 停止服务: python manage.py stop_webserver")
  445. else:
  446. print(f" ❌ Web 服务器启动失败")
  447. except Exception as e:
  448. print(f" ❌ 启动失败: {e}")
  449. def stop_webserver():
  450. """停止 Web 服务器"""
  451. print("🛑 停止 Web 服务器...")
  452. if not Path(WEBSERVER_PID_FILE).exists():
  453. print(" ℹ️ Web 服务器未运行")
  454. return
  455. try:
  456. with open(WEBSERVER_PID_FILE, 'r') as f:
  457. pid = int(f.read().strip())
  458. try:
  459. # 尝试终止进程
  460. os.kill(pid, signal.SIGTERM)
  461. time.sleep(0.5)
  462. # 检查进程是否已终止
  463. try:
  464. os.kill(pid, 0)
  465. # 进程还在,强制杀死
  466. os.kill(pid, signal.SIGKILL)
  467. print(f" ⚠️ 强制停止 Web 服务器 (PID: {pid})")
  468. except OSError:
  469. print(f" ✅ Web 服务器已停止 (PID: {pid})")
  470. except OSError as e:
  471. if e.errno == 3: # No such process
  472. print(f" ℹ️ 进程已不存在 (PID: {pid})")
  473. else:
  474. raise
  475. # 删除 PID 文件
  476. os.remove(WEBSERVER_PID_FILE)
  477. except Exception as e:
  478. print(f" ❌ 停止失败: {e}")
  479. # 尝试清理 PID 文件
  480. try:
  481. os.remove(WEBSERVER_PID_FILE)
  482. except:
  483. pass
  484. def webserver_status():
  485. """查看 Web 服务器状态"""
  486. print("🌐 Web 服务器状态:")
  487. if not Path(WEBSERVER_PID_FILE).exists():
  488. print(" ⭕ 未运行")
  489. print(f" 💡 启动服务: python manage.py start_webserver")
  490. return
  491. try:
  492. with open(WEBSERVER_PID_FILE, 'r') as f:
  493. pid = int(f.read().strip())
  494. try:
  495. os.kill(pid, 0) # 检查进程是否存在
  496. print(f" ✅ 运行中 (PID: {pid})")
  497. print(f" 📁 服务目录: {WEBSERVER_DIR}")
  498. print(f" 🌐 访问地址: http://localhost:{WEBSERVER_PORT}")
  499. print(f" 📄 首页: http://localhost:{WEBSERVER_PORT}/index.html")
  500. print(" 💡 停止服务: python manage.py stop_webserver")
  501. except OSError:
  502. print(f" ⭕ 未运行 (PID 文件存在但进程不存在)")
  503. os.remove(WEBSERVER_PID_FILE)
  504. print(" 💡 启动服务: python manage.py start_webserver")
  505. except Exception as e:
  506. print(f" ❌ 状态检查失败: {e}")
  507. def show_help():
  508. """显示帮助信息"""
  509. help_text = """
  510. 🐳 TrendRadar 容器管理工具
  511. 📋 命令列表:
  512. run - 手动执行一次爬虫
  513. status - 显示容器运行状态
  514. config - 显示当前配置
  515. files - 显示输出文件
  516. logs - 实时查看日志
  517. restart - 重启说明
  518. start_webserver - 启动 Web 服务器托管 output 目录
  519. stop_webserver - 停止 Web 服务器
  520. webserver_status - 查看 Web 服务器状态
  521. help - 显示此帮助
  522. 📖 使用示例:
  523. # 在容器中执行
  524. python manage.py run
  525. python manage.py status
  526. python manage.py logs
  527. python manage.py start_webserver
  528. # 在宿主机执行
  529. docker exec -it trendradar python manage.py run
  530. docker exec -it trendradar python manage.py status
  531. docker exec -it trendradar python manage.py start_webserver
  532. docker logs trendradar
  533. 💡 常用操作指南:
  534. 1. 检查运行状态: status
  535. - 查看 supercronic 是否为 PID 1
  536. - 检查配置文件和关键文件
  537. - 查看 cron 调度设置
  538. 2. 手动执行测试: run
  539. - 立即执行一次新闻爬取
  540. - 测试程序是否正常工作
  541. 3. 查看日志: logs
  542. - 实时监控运行情况
  543. - 也可使用: docker logs trendradar
  544. 4. 重启服务: restart
  545. - 由于 supercronic 是 PID 1,需要重启整个容器
  546. - 使用: docker restart trendradar
  547. 5. Web 服务器管理:
  548. - 启动: start_webserver
  549. - 停止: stop_webserver
  550. - 状态: webserver_status
  551. - 访问: http://localhost:8080
  552. """
  553. print(help_text)
  554. def main():
  555. if len(sys.argv) < 2:
  556. show_help()
  557. return
  558. command = sys.argv[1]
  559. commands = {
  560. "run": manual_run,
  561. "status": show_status,
  562. "config": show_config,
  563. "files": show_files,
  564. "logs": show_logs,
  565. "restart": restart_supercronic,
  566. "start_webserver": start_webserver,
  567. "stop_webserver": stop_webserver,
  568. "webserver_status": webserver_status,
  569. "help": show_help,
  570. }
  571. if command in commands:
  572. try:
  573. commands[command]()
  574. except KeyboardInterrupt:
  575. print("\n👋 操作已取消")
  576. except Exception as e:
  577. print(f"❌ 执行出错: {e}")
  578. else:
  579. print(f"❌ 未知命令: {command}")
  580. print("运行 'python manage.py help' 查看可用命令")
  581. if __name__ == "__main__":
  582. main()