crawler.yml 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. name: Hot News Crawler
  2. on:
  3. schedule:
  4. # ⚠️ 重要提示:我们使用的是 GitHub 官方提供的资源来进行的推送,而每个账号的资源是限额的
  5. # 为了不被官方判定为滥用而面临封号的风险,不建议设置比半小时更短的执行间隔
  6. #
  7. # Cron 表达式示例(注意:GitHub Actions 使用 UTC 时间,北京时间需要减 8 小时):
  8. # - "0 * * * *" # 每小时整点运行一次(实际有偏差)
  9. # - "*/30 * * * *" # 每 30 分钟执行一次
  10. # - "*/30 0-14 * * *" # 每天 UTC 0:00-14:00(北京时间 8:00-22:00),每 30 分钟运行一次
  11. # - "0 0,6,12,18 * * *" # 每天 UTC 0:00,6:00,12:00,18:00(北京时间 8:00,14:00,20:00,次日2:00)
  12. - cron: "0 * * * *"
  13. workflow_dispatch:
  14. concurrency:
  15. group: crawler-${{ github.ref_name }}
  16. cancel-in-progress: true
  17. permissions:
  18. contents: write
  19. jobs:
  20. crawl:
  21. runs-on: ubuntu-latest
  22. timeout-minutes: 30
  23. steps:
  24. - name: Checkout repository
  25. uses: actions/checkout@v3
  26. with:
  27. fetch-depth: 0
  28. clean: true
  29. - name: Set up Python
  30. uses: actions/setup-python@v4
  31. with:
  32. python-version: "3.10"
  33. - name: Install dependencies
  34. run: |
  35. python -m pip install --upgrade pip
  36. pip install -r requirements.txt
  37. - name: Verify required files
  38. run: |
  39. echo "🔍 检查必需的配置文件..."
  40. if [ ! -f config/config.yaml ]; then
  41. echo "❌ 错误: config/config.yaml 文件不存在"
  42. exit 1
  43. fi
  44. if [ ! -f config/frequency_words.txt ]; then
  45. echo "❌ 错误: config/frequency_words.txt 文件不存在"
  46. exit 1
  47. fi
  48. echo "✅ 配置文件检查通过"
  49. - name: Run crawler
  50. env:
  51. FEISHU_WEBHOOK_URL: ${{ secrets.FEISHU_WEBHOOK_URL }}
  52. TELEGRAM_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
  53. TELEGRAM_CHAT_ID: ${{ secrets.TELEGRAM_CHAT_ID }}
  54. DINGTALK_WEBHOOK_URL: ${{ secrets.DINGTALK_WEBHOOK_URL }}
  55. WEWORK_WEBHOOK_URL: ${{ secrets.WEWORK_WEBHOOK_URL }}
  56. WEWORK_MSG_TYPE: ${{ secrets.WEWORK_MSG_TYPE }}
  57. EMAIL_FROM: ${{ secrets.EMAIL_FROM }}
  58. EMAIL_PASSWORD: ${{ secrets.EMAIL_PASSWORD }}
  59. EMAIL_TO: ${{ secrets.EMAIL_TO }}
  60. EMAIL_SMTP_SERVER: ${{ secrets.EMAIL_SMTP_SERVER }}
  61. EMAIL_SMTP_PORT: ${{ secrets.EMAIL_SMTP_PORT }}
  62. NTFY_TOPIC: ${{ secrets.NTFY_TOPIC }}
  63. NTFY_SERVER_URL: ${{ secrets.NTFY_SERVER_URL }}
  64. NTFY_TOKEN: ${{ secrets.NTFY_TOKEN }}
  65. BARK_URL: ${{ secrets.BARK_URL }}
  66. SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
  67. GITHUB_ACTIONS: true
  68. run: python main.py
  69. - name: Commit and push if changes
  70. env:
  71. BRANCH_NAME: ${{ github.event.repository.default_branch }}
  72. run: |
  73. git config --global user.name 'GitHub Actions'
  74. git config --global user.email 'actions@github.com'
  75. echo "🔄 Syncing with remote (branch: $BRANCH_NAME)..."
  76. git fetch origin $BRANCH_NAME
  77. # 保存当前更改
  78. git stash --include-untracked || echo "Nothing to stash"
  79. # 同步到远程最新
  80. git reset --hard origin/$BRANCH_NAME
  81. # 恢复本次更改
  82. git stash pop || echo "Nothing to pop"
  83. git add -A
  84. if git diff --quiet && git diff --staged --quiet; then
  85. echo "📭 No changes to commit"
  86. exit 0
  87. fi
  88. echo "📝 Committing changes..."
  89. TIMESTAMP=$(TZ=Asia/Shanghai date "+%Y-%m-%d %H:%M:%S")
  90. git commit -m "Auto update by GitHub Actions at $TIMESTAMP"
  91. echo "⬆️ Pushing changes with retry..."
  92. for i in {1..5}; do
  93. git pull --rebase origin $BRANCH_NAME && git push origin $BRANCH_NAME && {
  94. echo "✅ Successfully pushed on attempt $i"
  95. exit 0
  96. }
  97. echo "⚠️ Attempt $i failed, waiting $((i*3)) seconds..."
  98. sleep $((i * 3))
  99. done
  100. echo "❌ Failed to push after 5 attempts"
  101. exit 1