crawler.yml 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109
  1. name: Hot News Crawler
  2. on:
  3. schedule:
  4. # ⚠️ 重要提示:我们使用的是 GitHub 官方提供的资源来进行的推送,而每个账号的资源是限额的
  5. # 为了不被官方判定为滥用而面临封号的风险,不建议设置比半小时更短的执行间隔
  6. #
  7. # Cron 表达式示例(注意:GitHub Actions 使用 UTC 时间,北京时间需要减 8 小时):
  8. # - "0 * * * *" # 每小时整点运行一次(实际有偏差)
  9. # - "*/30 * * * *" # 每 30 分钟执行一次
  10. # - "*/30 0-14 * * *" # 每天 UTC 0:00-14:00(北京时间 8:00-22:00),每 30 分钟运行一次
  11. # - "0 0,6,12,18 * * *" # 每天 UTC 0:00,6:00,12:00,18:00(北京时间 8:00,14:00,20:00,次日2:00)
  12. - cron: "0 * * * *"
  13. workflow_dispatch:
  14. concurrency:
  15. group: crawler-${{ github.ref_name }}
  16. cancel-in-progress: true
  17. permissions:
  18. contents: write
  19. jobs:
  20. crawl:
  21. runs-on: ubuntu-latest
  22. timeout-minutes: 5
  23. steps:
  24. - name: Checkout repository
  25. uses: actions/checkout@v4
  26. with:
  27. fetch-depth: 1
  28. clean: true
  29. token: ${{ secrets.GITHUB_TOKEN }}
  30. - name: Set up Python
  31. uses: actions/setup-python@v5
  32. with:
  33. python-version: "3.10"
  34. cache: 'pip'
  35. - name: Install dependencies
  36. run: |
  37. python -m pip install --upgrade pip
  38. pip install -r requirements.txt
  39. - name: Verify required files
  40. run: |
  41. echo "🔍 检查必需的配置文件..."
  42. if [ ! -f config/config.yaml ]; then
  43. echo "❌ 错误: config/config.yaml 文件不存在"
  44. exit 1
  45. fi
  46. if [ ! -f config/frequency_words.txt ]; then
  47. echo "❌ 错误: config/frequency_words.txt 文件不存在"
  48. exit 1
  49. fi
  50. echo "✅ 配置文件检查通过"
  51. - name: Run crawler
  52. env:
  53. FEISHU_WEBHOOK_URL: ${{ secrets.FEISHU_WEBHOOK_URL }}
  54. TELEGRAM_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
  55. TELEGRAM_CHAT_ID: ${{ secrets.TELEGRAM_CHAT_ID }}
  56. DINGTALK_WEBHOOK_URL: ${{ secrets.DINGTALK_WEBHOOK_URL }}
  57. WEWORK_WEBHOOK_URL: ${{ secrets.WEWORK_WEBHOOK_URL }}
  58. WEWORK_MSG_TYPE: ${{ secrets.WEWORK_MSG_TYPE }}
  59. EMAIL_FROM: ${{ secrets.EMAIL_FROM }}
  60. EMAIL_PASSWORD: ${{ secrets.EMAIL_PASSWORD }}
  61. EMAIL_TO: ${{ secrets.EMAIL_TO }}
  62. EMAIL_SMTP_SERVER: ${{ secrets.EMAIL_SMTP_SERVER }}
  63. EMAIL_SMTP_PORT: ${{ secrets.EMAIL_SMTP_PORT }}
  64. NTFY_TOPIC: ${{ secrets.NTFY_TOPIC }}
  65. NTFY_SERVER_URL: ${{ secrets.NTFY_SERVER_URL }}
  66. NTFY_TOKEN: ${{ secrets.NTFY_TOKEN }}
  67. BARK_URL: ${{ secrets.BARK_URL }}
  68. SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
  69. GITHUB_ACTIONS: true
  70. run: python main.py
  71. - name: Commit and push if changes
  72. env:
  73. BRANCH_NAME: ${{ github.event.repository.default_branch }}
  74. run: |
  75. git config --global user.name 'GitHub Actions'
  76. git config --global user.email 'actions@github.com'
  77. git add -A
  78. if git diff --quiet && git diff --staged --quiet; then
  79. echo "📭 No changes to commit"
  80. exit 0
  81. fi
  82. echo "📝 Committing changes..."
  83. TIMESTAMP=$(TZ=Asia/Shanghai date "+%Y-%m-%d %H:%M:%S")
  84. git commit -m "Auto update by GitHub Actions at $TIMESTAMP"
  85. echo "⬆️ Pushing changes with retry..."
  86. for i in {1..5}; do
  87. git pull --rebase origin $BRANCH_NAME && git push origin $BRANCH_NAME && {
  88. echo "✅ Successfully pushed on attempt $i"
  89. exit 0
  90. }
  91. echo "⚠️ Attempt $i failed, waiting $((i*5)) seconds..."
  92. sleep $((i * 5))
  93. done
  94. echo "❌ Failed to push after 5 attempts"
  95. exit 1