manage.py 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. 新闻爬虫容器管理工具 - supercronic
  5. """
  6. import os
  7. import sys
  8. import subprocess
  9. import time
  10. import signal
  11. from pathlib import Path
  12. from datetime import datetime
  13. # Web 服务器配置
  14. WEBSERVER_PORT = int(os.environ.get("WEBSERVER_PORT", "8080"))
  15. WEBSERVER_DIR = "/app/output"
  16. WEBSERVER_PID_FILE = "/tmp/webserver.pid"
  17. def get_timestamp():
  18. """获取当前时间戳字符串"""
  19. return datetime.now().strftime("%Y-%m-%d %H:%M:%S")
  20. def run_command(cmd, shell=True, capture_output=True):
  21. """执行系统命令"""
  22. try:
  23. result = subprocess.run(
  24. cmd, shell=shell, capture_output=capture_output, text=True
  25. )
  26. return result.returncode == 0, result.stdout, result.stderr
  27. except Exception as e:
  28. return False, "", str(e)
  29. def manual_run():
  30. """手动执行一次爬虫"""
  31. print("🔄 手动执行爬虫...")
  32. try:
  33. result = subprocess.run(
  34. ["python", "-m", "trendradar"], cwd="/app", capture_output=False, text=True
  35. )
  36. if result.returncode == 0:
  37. print("✅ 执行完成")
  38. else:
  39. print(f"❌ 执行失败,退出码: {result.returncode}")
  40. except Exception as e:
  41. print(f"❌ 执行出错: {e}")
  42. def parse_cron_schedule(cron_expr):
  43. """解析cron表达式并返回人类可读的描述"""
  44. if not cron_expr or cron_expr == "未设置":
  45. return "未设置"
  46. try:
  47. parts = cron_expr.strip().split()
  48. if len(parts) != 5:
  49. return f"原始表达式: {cron_expr}"
  50. minute, hour, day, month, weekday = parts
  51. # 分析分钟
  52. if minute == "*":
  53. minute_desc = "每分钟"
  54. elif minute.startswith("*/"):
  55. interval = minute[2:]
  56. minute_desc = f"每{interval}分钟"
  57. elif "," in minute:
  58. minute_desc = f"在第{minute}分钟"
  59. else:
  60. minute_desc = f"在第{minute}分钟"
  61. # 分析小时
  62. if hour == "*":
  63. hour_desc = "每小时"
  64. elif hour.startswith("*/"):
  65. interval = hour[2:]
  66. hour_desc = f"每{interval}小时"
  67. elif "," in hour:
  68. hour_desc = f"在{hour}点"
  69. else:
  70. hour_desc = f"在{hour}点"
  71. # 分析日期
  72. if day == "*":
  73. day_desc = "每天"
  74. elif day.startswith("*/"):
  75. interval = day[2:]
  76. day_desc = f"每{interval}天"
  77. else:
  78. day_desc = f"每月{day}号"
  79. # 分析月份
  80. if month == "*":
  81. month_desc = "每月"
  82. else:
  83. month_desc = f"在{month}月"
  84. # 分析星期
  85. weekday_names = {
  86. "0": "周日", "1": "周一", "2": "周二", "3": "周三",
  87. "4": "周四", "5": "周五", "6": "周六", "7": "周日"
  88. }
  89. if weekday == "*":
  90. weekday_desc = ""
  91. else:
  92. weekday_desc = f"在{weekday_names.get(weekday, weekday)}"
  93. # 组合描述
  94. if minute.startswith("*/") and hour == "*" and day == "*" and month == "*" and weekday == "*":
  95. # 简单的间隔模式,如 */30 * * * *
  96. return f"每{minute[2:]}分钟执行一次"
  97. elif hour != "*" and minute != "*" and day == "*" and month == "*" and weekday == "*":
  98. # 每天特定时间,如 0 9 * * *
  99. return f"每天{hour}:{minute.zfill(2)}执行"
  100. elif weekday != "*" and day == "*":
  101. # 每周特定时间
  102. return f"{weekday_desc}{hour}:{minute.zfill(2)}执行"
  103. else:
  104. # 复杂模式,显示详细信息
  105. desc_parts = [part for part in [month_desc, day_desc, weekday_desc, hour_desc, minute_desc] if part and part != "每月" and part != "每天" and part != "每小时"]
  106. if desc_parts:
  107. return " ".join(desc_parts) + "执行"
  108. else:
  109. return f"复杂表达式: {cron_expr}"
  110. except Exception as e:
  111. return f"解析失败: {cron_expr}"
  112. def show_status():
  113. """显示容器状态"""
  114. print("📊 容器状态:")
  115. # 检查 PID 1 状态
  116. supercronic_is_pid1 = False
  117. pid1_cmdline = ""
  118. try:
  119. with open('/proc/1/cmdline', 'r') as f:
  120. pid1_cmdline = f.read().replace('\x00', ' ').strip()
  121. print(f" 🔍 PID 1 进程: {pid1_cmdline}")
  122. if "supercronic" in pid1_cmdline.lower():
  123. print(" ✅ supercronic 正确运行为 PID 1")
  124. supercronic_is_pid1 = True
  125. else:
  126. print(" ❌ PID 1 不是 supercronic")
  127. print(f" 📋 实际的 PID 1: {pid1_cmdline}")
  128. except Exception as e:
  129. print(f" ❌ 无法读取 PID 1 信息: {e}")
  130. # 检查环境变量
  131. cron_schedule = os.environ.get("CRON_SCHEDULE", "未设置")
  132. run_mode = os.environ.get("RUN_MODE", "未设置")
  133. immediate_run = os.environ.get("IMMEDIATE_RUN", "未设置")
  134. print(f" ⚙️ 运行配置:")
  135. print(f" CRON_SCHEDULE: {cron_schedule}")
  136. # 解析并显示cron表达式的含义
  137. cron_description = parse_cron_schedule(cron_schedule)
  138. print(f" ⏰ 执行频率: {cron_description}")
  139. print(f" RUN_MODE: {run_mode}")
  140. print(f" IMMEDIATE_RUN: {immediate_run}")
  141. # 检查配置文件
  142. config_files = ["/app/config/config.yaml", "/app/config/frequency_words.txt"]
  143. print(" 📁 配置文件:")
  144. for file_path in config_files:
  145. if Path(file_path).exists():
  146. print(f" ✅ {Path(file_path).name}")
  147. else:
  148. print(f" ❌ {Path(file_path).name} 缺失")
  149. # 检查关键文件
  150. key_files = [
  151. ("/usr/local/bin/supercronic-linux-amd64", "supercronic二进制文件"),
  152. ("/usr/local/bin/supercronic", "supercronic软链接"),
  153. ("/tmp/crontab", "crontab文件"),
  154. ("/entrypoint.sh", "启动脚本")
  155. ]
  156. print(" 📂 关键文件检查:")
  157. for file_path, description in key_files:
  158. if Path(file_path).exists():
  159. print(f" ✅ {description}: 存在")
  160. # 对于crontab文件,显示内容
  161. if file_path == "/tmp/crontab":
  162. try:
  163. with open(file_path, 'r') as f:
  164. crontab_content = f.read().strip()
  165. print(f" 内容: {crontab_content}")
  166. except:
  167. pass
  168. else:
  169. print(f" ❌ {description}: 不存在")
  170. # 检查容器运行时间
  171. print(" ⏱️ 容器时间信息:")
  172. try:
  173. # 检查 PID 1 的启动时间
  174. with open('/proc/1/stat', 'r') as f:
  175. stat_content = f.read().strip().split()
  176. if len(stat_content) >= 22:
  177. # starttime 是第22个字段(索引21)
  178. starttime_ticks = int(stat_content[21])
  179. # 读取系统启动时间
  180. with open('/proc/stat', 'r') as stat_f:
  181. for line in stat_f:
  182. if line.startswith('btime'):
  183. boot_time = int(line.split()[1])
  184. break
  185. else:
  186. boot_time = 0
  187. # 读取系统时钟频率
  188. clock_ticks = os.sysconf(os.sysconf_names['SC_CLK_TCK'])
  189. if boot_time > 0:
  190. pid1_start_time = boot_time + (starttime_ticks / clock_ticks)
  191. current_time = time.time()
  192. uptime_seconds = int(current_time - pid1_start_time)
  193. uptime_minutes = uptime_seconds // 60
  194. uptime_hours = uptime_minutes // 60
  195. if uptime_hours > 0:
  196. print(f" PID 1 运行时间: {uptime_hours} 小时 {uptime_minutes % 60} 分钟")
  197. else:
  198. print(f" PID 1 运行时间: {uptime_minutes} 分钟 ({uptime_seconds} 秒)")
  199. else:
  200. print(f" PID 1 运行时间: 无法精确计算")
  201. else:
  202. print(" ❌ 无法解析 PID 1 统计信息")
  203. except Exception as e:
  204. print(f" ❌ 时间检查失败: {e}")
  205. # 状态总结和建议
  206. print(" 📊 状态总结:")
  207. if supercronic_is_pid1:
  208. print(" ✅ supercronic 正确运行为 PID 1")
  209. print(" ✅ 定时任务应该正常工作")
  210. # 显示当前的调度信息
  211. if cron_schedule != "未设置":
  212. print(f" ⏰ 当前调度: {cron_description}")
  213. # 提供一些常见的调度建议
  214. if "分钟" in cron_description and "每30分钟" not in cron_description and "每60分钟" not in cron_description:
  215. print(" 💡 频繁执行模式,适合实时监控")
  216. elif "小时" in cron_description:
  217. print(" 💡 按小时执行模式,适合定期汇总")
  218. elif "天" in cron_description:
  219. print(" 💡 每日执行模式,适合日报生成")
  220. print(" 💡 如果定时任务不执行,检查:")
  221. print(" • crontab 格式是否正确")
  222. print(" • 时区设置是否正确")
  223. print(" • 应用程序是否有错误")
  224. else:
  225. print(" ❌ supercronic 状态异常")
  226. if pid1_cmdline:
  227. print(f" 📋 当前 PID 1: {pid1_cmdline}")
  228. print(" 💡 建议操作:")
  229. print(" • 重启容器: docker restart trendradar")
  230. print(" • 检查容器日志: docker logs trendradar")
  231. # 显示日志检查建议
  232. print(" 📋 运行状态检查:")
  233. print(" • 查看完整容器日志: docker logs trendradar")
  234. print(" • 查看实时日志: docker logs -f trendradar")
  235. print(" • 手动执行测试: python manage.py run")
  236. print(" • 重启容器服务: docker restart trendradar")
  237. def show_config():
  238. """显示当前配置"""
  239. print("⚙️ 当前配置:")
  240. env_vars = [
  241. # 运行配置
  242. "CRON_SCHEDULE",
  243. "RUN_MODE",
  244. "IMMEDIATE_RUN",
  245. # 通知渠道
  246. "FEISHU_WEBHOOK_URL",
  247. "DINGTALK_WEBHOOK_URL",
  248. "WEWORK_WEBHOOK_URL",
  249. "WEWORK_MSG_TYPE",
  250. "TELEGRAM_BOT_TOKEN",
  251. "TELEGRAM_CHAT_ID",
  252. "NTFY_SERVER_URL",
  253. "NTFY_TOPIC",
  254. "NTFY_TOKEN",
  255. "BARK_URL",
  256. "SLACK_WEBHOOK_URL",
  257. # AI 分析配置
  258. "AI_ANALYSIS_ENABLED",
  259. "AI_API_KEY",
  260. "AI_PROVIDER",
  261. "AI_MODEL",
  262. "AI_BASE_URL",
  263. # 远程存储配置
  264. "S3_BUCKET_NAME",
  265. "S3_ACCESS_KEY_ID",
  266. "S3_ENDPOINT_URL",
  267. "S3_REGION",
  268. ]
  269. for var in env_vars:
  270. value = os.environ.get(var, "未设置")
  271. # 隐藏敏感信息
  272. if any(sensitive in var for sensitive in ["WEBHOOK", "TOKEN", "KEY", "SECRET"]):
  273. if value and value != "未设置":
  274. masked_value = value[:10] + "***" if len(value) > 10 else "***"
  275. print(f" {var}: {masked_value}")
  276. else:
  277. print(f" {var}: {value}")
  278. else:
  279. print(f" {var}: {value}")
  280. crontab_file = "/tmp/crontab"
  281. if Path(crontab_file).exists():
  282. print(" 📅 Crontab内容:")
  283. try:
  284. with open(crontab_file, "r") as f:
  285. content = f.read().strip()
  286. print(f" {content}")
  287. except Exception as e:
  288. print(f" 读取失败: {e}")
  289. else:
  290. print(" 📅 Crontab文件不存在")
  291. def show_files():
  292. """显示输出文件"""
  293. print("📁 输出文件:")
  294. output_dir = Path("/app/output")
  295. if not output_dir.exists():
  296. print(" 📭 输出目录不存在")
  297. return
  298. # 新结构:扁平化目录
  299. # - output/news/*.db
  300. # - output/rss/*.db
  301. # - output/txt/{date}/*.txt
  302. # - output/html/{date}/*.html
  303. # 检查 news 数据库
  304. news_dir = output_dir / "news"
  305. if news_dir.exists():
  306. db_files = sorted(news_dir.glob("*.db"), key=lambda x: x.name, reverse=True)
  307. if db_files:
  308. print(f" 💾 热榜数据库 (news/): {len(db_files)} 个")
  309. for db_file in db_files[:5]:
  310. mtime = time.ctime(db_file.stat().st_mtime)
  311. size_kb = db_file.stat().st_size // 1024
  312. print(f" 📀 {db_file.name} ({size_kb}KB, {mtime.split()[3][:5]})")
  313. if len(db_files) > 5:
  314. print(f" ... 还有 {len(db_files) - 5} 个")
  315. # 检查 RSS 数据库
  316. rss_dir = output_dir / "rss"
  317. if rss_dir.exists():
  318. db_files = sorted(rss_dir.glob("*.db"), key=lambda x: x.name, reverse=True)
  319. if db_files:
  320. print(f" 📰 RSS 数据库 (rss/): {len(db_files)} 个")
  321. for db_file in db_files[:5]:
  322. mtime = time.ctime(db_file.stat().st_mtime)
  323. size_kb = db_file.stat().st_size // 1024
  324. print(f" 📀 {db_file.name} ({size_kb}KB, {mtime.split()[3][:5]})")
  325. if len(db_files) > 5:
  326. print(f" ... 还有 {len(db_files) - 5} 个")
  327. # 检查 TXT 快照目录
  328. txt_dir = output_dir / "txt"
  329. if txt_dir.exists():
  330. date_dirs = sorted([d for d in txt_dir.iterdir() if d.is_dir()], reverse=True)
  331. if date_dirs:
  332. print(f" 📄 TXT 快照 (txt/): {len(date_dirs)} 天")
  333. for date_dir in date_dirs[:3]:
  334. txt_files = list(date_dir.glob("*.txt"))
  335. if txt_files:
  336. recent = sorted(txt_files, key=lambda x: x.stat().st_mtime, reverse=True)[0]
  337. mtime = time.ctime(recent.stat().st_mtime)
  338. print(f" 📅 {date_dir.name}: {len(txt_files)} 个文件 (最新: {mtime.split()[3][:5]})")
  339. # 检查 HTML 报告目录
  340. html_dir = output_dir / "html"
  341. if html_dir.exists():
  342. date_dirs = sorted([d for d in html_dir.iterdir() if d.is_dir()], reverse=True)
  343. if date_dirs:
  344. print(f" 🌐 HTML 报告 (html/): {len(date_dirs)} 天")
  345. for date_dir in date_dirs[:3]:
  346. html_files = list(date_dir.glob("*.html"))
  347. if html_files:
  348. recent = sorted(html_files, key=lambda x: x.stat().st_mtime, reverse=True)[0]
  349. mtime = time.ctime(recent.stat().st_mtime)
  350. print(f" 📅 {date_dir.name}: {len(html_files)} 个文件 (最新: {mtime.split()[3][:5]})")
  351. def show_logs():
  352. """显示实时日志"""
  353. print("📋 实时日志 (按 Ctrl+C 退出):")
  354. print("💡 提示: 这将显示 PID 1 进程的输出")
  355. try:
  356. # 尝试多种方法查看日志
  357. log_files = [
  358. "/proc/1/fd/1", # PID 1 的标准输出
  359. "/proc/1/fd/2", # PID 1 的标准错误
  360. ]
  361. for log_file in log_files:
  362. if Path(log_file).exists():
  363. print(f"📄 尝试读取: {log_file}")
  364. subprocess.run(["tail", "-f", log_file], check=True)
  365. break
  366. else:
  367. print("📋 无法找到标准日志文件,建议使用: docker logs trendradar")
  368. except KeyboardInterrupt:
  369. print("\n👋 退出日志查看")
  370. except Exception as e:
  371. print(f"❌ 查看日志失败: {e}")
  372. print("💡 建议使用: docker logs trendradar")
  373. def restart_supercronic():
  374. """重启supercronic进程"""
  375. print("🔄 重启supercronic...")
  376. print("⚠️ 注意: supercronic 是 PID 1,无法直接重启")
  377. # 检查当前 PID 1
  378. try:
  379. with open('/proc/1/cmdline', 'r') as f:
  380. pid1_cmdline = f.read().replace('\x00', ' ').strip()
  381. print(f" 🔍 当前 PID 1: {pid1_cmdline}")
  382. if "supercronic" in pid1_cmdline.lower():
  383. print(" ✅ PID 1 是 supercronic")
  384. print(" 💡 要重启 supercronic,需要重启整个容器:")
  385. print(" docker restart trendradar")
  386. else:
  387. print(" ❌ PID 1 不是 supercronic,这是异常状态")
  388. print(" 💡 建议重启容器以修复问题:")
  389. print(" docker restart trendradar")
  390. except Exception as e:
  391. print(f" ❌ 无法检查 PID 1: {e}")
  392. print(" 💡 建议重启容器: docker restart trendradar")
  393. def _read_proc_cmdline(pid: int) -> str:
  394. """读取进程 cmdline,失败时返回空字符串。"""
  395. proc_cmdline = Path(f"/proc/{pid}/cmdline")
  396. if not proc_cmdline.exists():
  397. return ""
  398. try:
  399. with open(proc_cmdline, "rb") as f:
  400. return f.read().replace(b"\x00", b" ").decode("utf-8", errors="ignore").strip()
  401. except Exception:
  402. return ""
  403. def _is_expected_webserver_process(pid: int) -> bool:
  404. """检查 pid 是否是当前端口的 http.server 进程。"""
  405. cmdline = _read_proc_cmdline(pid)
  406. if not cmdline:
  407. return False
  408. return "http.server" in cmdline and str(WEBSERVER_PORT) in cmdline
  409. def _terminate_webserver_process(pid: int, require_expected: bool = True) -> bool:
  410. """尝试终止 Web 服务器进程。
  411. require_expected=True 时,仅终止确认是 http.server 的进程,避免误杀。
  412. """
  413. try:
  414. os.kill(pid, 0)
  415. except OSError:
  416. return True
  417. if require_expected and not _is_expected_webserver_process(pid):
  418. print(f" ⚠️ PID {pid} 存在但并非 Web 服务器进程,跳过终止")
  419. return False
  420. try:
  421. os.kill(pid, signal.SIGTERM)
  422. time.sleep(0.5)
  423. try:
  424. os.kill(pid, 0)
  425. os.kill(pid, signal.SIGKILL)
  426. print(f" ⚠️ 强制停止 Web 服务器 (PID: {pid})")
  427. except OSError:
  428. print(f" ✅ Web 服务器已停止 (PID: {pid})")
  429. return True
  430. except OSError:
  431. return True
  432. def _is_webserver_running(pid: int) -> bool:
  433. """检查 Web 服务器进程是否真正在运行。"""
  434. try:
  435. os.kill(pid, 0)
  436. except OSError:
  437. return False
  438. if not _is_expected_webserver_process(pid):
  439. return False
  440. try:
  441. import urllib.request
  442. req = urllib.request.Request(f"http://127.0.0.1:{WEBSERVER_PORT}/", method="HEAD")
  443. urllib.request.urlopen(req, timeout=3)
  444. return True
  445. except Exception:
  446. try:
  447. time.sleep(1)
  448. import urllib.request
  449. req = urllib.request.Request(f"http://127.0.0.1:{WEBSERVER_PORT}/", method="HEAD")
  450. urllib.request.urlopen(req, timeout=3)
  451. return True
  452. except Exception:
  453. return False
  454. def _cleanup_stale_pid():
  455. """清理失效的 PID 文件"""
  456. if not Path(WEBSERVER_PID_FILE).exists():
  457. return False
  458. try:
  459. with open(WEBSERVER_PID_FILE, 'r') as f:
  460. old_pid = int(f.read().strip())
  461. os.remove(WEBSERVER_PID_FILE)
  462. print(f" 🧹 清理失效 PID 文件 (PID: {old_pid})")
  463. return True
  464. except Exception:
  465. return False
  466. def start_webserver():
  467. """启动 Web 服务器托管 output 目录"""
  468. print(f"🌐 启动 Web 服务器 (端口: {WEBSERVER_PORT})...")
  469. print(f" 🔒 安全提示:仅提供静态文件访问,限制在 {WEBSERVER_DIR} 目录")
  470. # 检查是否已经运行
  471. if Path(WEBSERVER_PID_FILE).exists():
  472. try:
  473. with open(WEBSERVER_PID_FILE, 'r') as f:
  474. old_pid = int(f.read().strip())
  475. # 使用增强的进程检查
  476. if _is_webserver_running(old_pid):
  477. print(f" ⚠️ Web 服务器已在运行 (PID: {old_pid})")
  478. print(f" 💡 访问: http://localhost:{WEBSERVER_PORT}")
  479. print(" 💡 停止服务: python manage.py stop_webserver")
  480. return
  481. # 进程异常时优先尝试终止旧进程,避免端口占用导致重启失败
  482. _terminate_webserver_process(old_pid, require_expected=True)
  483. _cleanup_stale_pid()
  484. print(f" ℹ️ 检测到失效的 PID 文件,已清理")
  485. except Exception as e:
  486. print(f" ⚠️ 清理旧的 PID 文件: {e}")
  487. _cleanup_stale_pid()
  488. # 检查目录是否存在
  489. if not Path(WEBSERVER_DIR).exists():
  490. print(f" ❌ 目录不存在: {WEBSERVER_DIR}")
  491. return
  492. try:
  493. # 启动 HTTP 服务器
  494. # 使用 --bind 绑定到 0.0.0.0 使容器内部可访问
  495. # 工作目录限制在 WEBSERVER_DIR,防止访问其他目录
  496. process = subprocess.Popen(
  497. [sys.executable, '-m', 'http.server', str(WEBSERVER_PORT), '--bind', '0.0.0.0'],
  498. cwd=WEBSERVER_DIR,
  499. stdout=subprocess.DEVNULL,
  500. stderr=subprocess.DEVNULL,
  501. start_new_session=True
  502. )
  503. # 等待一下确保服务器启动
  504. time.sleep(1)
  505. # 检查进程是否还在运行
  506. if process.poll() is None:
  507. # 保存 PID
  508. with open(WEBSERVER_PID_FILE, 'w') as f:
  509. f.write(str(process.pid))
  510. print(f" ✅ Web 服务器已启动 (PID: {process.pid})")
  511. print(f" 📁 服务目录: {WEBSERVER_DIR} (只读,仅静态文件)")
  512. print(f" 🌐 访问地址: http://localhost:{WEBSERVER_PORT}")
  513. print(f" 📄 首页: http://localhost:{WEBSERVER_PORT}/index.html")
  514. print(" 💡 停止服务: python manage.py stop_webserver")
  515. else:
  516. print(f" ❌ Web 服务器启动失败")
  517. except Exception as e:
  518. print(f" ❌ 启动失败: {e}")
  519. def stop_webserver():
  520. """停止 Web 服务器"""
  521. print("🛑 停止 Web 服务器...")
  522. if not Path(WEBSERVER_PID_FILE).exists():
  523. print(" ℹ️ Web 服务器未运行")
  524. return
  525. try:
  526. with open(WEBSERVER_PID_FILE, 'r') as f:
  527. pid = int(f.read().strip())
  528. _terminate_webserver_process(pid, require_expected=True)
  529. if Path(WEBSERVER_PID_FILE).exists():
  530. os.remove(WEBSERVER_PID_FILE)
  531. except Exception as e:
  532. print(f" ❌ 停止失败: {e}")
  533. # 尝试清理 PID 文件
  534. try:
  535. os.remove(WEBSERVER_PID_FILE)
  536. except:
  537. pass
  538. def webserver_status():
  539. """查看 Web 服务器状态"""
  540. print("🌐 Web 服务器状态:")
  541. if not Path(WEBSERVER_PID_FILE).exists():
  542. print(" ⭕ 未运行")
  543. print(f" 💡 启动服务: python manage.py start_webserver")
  544. return
  545. try:
  546. with open(WEBSERVER_PID_FILE, 'r') as f:
  547. pid = int(f.read().strip())
  548. # 使用增强的进程检查
  549. if _is_webserver_running(pid):
  550. print(f" ✅ 运行中 (PID: {pid})")
  551. print(f" 📁 服务目录: {WEBSERVER_DIR}")
  552. print(f" 🌐 访问地址: http://localhost:{WEBSERVER_PORT}")
  553. print(f" 📄 首页: http://localhost:{WEBSERVER_PORT}/index.html")
  554. print(" 💡 停止服务: python manage.py stop_webserver")
  555. else:
  556. print(f" ⭕ 未运行 (PID 文件存在但进程不可用)")
  557. _cleanup_stale_pid()
  558. print(" 💡 启动服务: python manage.py start_webserver")
  559. except Exception as e:
  560. print(f" ❌ 状态检查失败: {e}")
  561. def show_help():
  562. """显示帮助信息"""
  563. help_text = """
  564. 🐳 TrendRadar 容器管理工具
  565. 📋 命令列表:
  566. run - 手动执行一次爬虫
  567. status - 显示容器运行状态
  568. config - 显示当前配置
  569. files - 显示输出文件
  570. logs - 实时查看日志
  571. restart - 重启说明
  572. start_webserver - 启动 Web 服务器托管 output 目录
  573. stop_webserver - 停止 Web 服务器
  574. webserver_status - 查看 Web 服务器状态
  575. help - 显示此帮助
  576. 📖 使用示例:
  577. # 在容器中执行
  578. python manage.py run
  579. python manage.py status
  580. python manage.py logs
  581. python manage.py start_webserver
  582. # 在宿主机执行
  583. docker exec -it trendradar python manage.py run
  584. docker exec -it trendradar python manage.py status
  585. docker exec -it trendradar python manage.py start_webserver
  586. docker logs trendradar
  587. 💡 常用操作指南:
  588. 1. 检查运行状态: status
  589. - 查看 supercronic 是否为 PID 1
  590. - 检查配置文件和关键文件
  591. - 查看 cron 调度设置
  592. 2. 手动执行测试: run
  593. - 立即执行一次新闻爬取
  594. - 测试程序是否正常工作
  595. 3. 查看日志: logs
  596. - 实时监控运行情况
  597. - 也可使用: docker logs trendradar
  598. 4. 重启服务: restart
  599. - 由于 supercronic 是 PID 1,需要重启整个容器
  600. - 使用: docker restart trendradar
  601. 5. Web 服务器管理:
  602. - 启动: start_webserver
  603. - 停止: stop_webserver
  604. - 状态: webserver_status
  605. - 访问: http://localhost:8080
  606. """
  607. print(help_text)
  608. def main():
  609. if len(sys.argv) < 2:
  610. show_help()
  611. return
  612. command = sys.argv[1]
  613. commands = {
  614. "run": manual_run,
  615. "status": show_status,
  616. "config": show_config,
  617. "files": show_files,
  618. "logs": show_logs,
  619. "restart": restart_supercronic,
  620. "start_webserver": start_webserver,
  621. "stop_webserver": stop_webserver,
  622. "webserver_status": webserver_status,
  623. "help": show_help,
  624. }
  625. if command in commands:
  626. try:
  627. commands[command]()
  628. except KeyboardInterrupt:
  629. print("\n👋 操作已取消")
  630. except Exception as e:
  631. print(f"❌ 执行出错: {e}")
  632. else:
  633. print(f"❌ 未知命令: {command}")
  634. print("运行 'python manage.py help' 查看可用命令")
  635. if __name__ == "__main__":
  636. main()