crawler.yml 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
  1. name: Hot News Crawler
  2. on:
  3. schedule:
  4. - cron: "0 * * * *"
  5. workflow_dispatch:
  6. concurrency:
  7. group: crawler-main-branch
  8. cancel-in-progress: true
  9. permissions:
  10. contents: write
  11. jobs:
  12. crawl:
  13. runs-on: ubuntu-latest
  14. timeout-minutes: 30
  15. steps:
  16. - name: Checkout repository
  17. uses: actions/checkout@v3
  18. with:
  19. ref: main
  20. fetch-depth: 0
  21. clean: true
  22. - name: Set up Python
  23. uses: actions/setup-python@v4
  24. with:
  25. python-version: "3.10"
  26. - name: Install dependencies
  27. run: |
  28. python -m pip install --upgrade pip
  29. pip install -r requirements.txt
  30. - name: Verify required files
  31. run: |
  32. echo "🔍 检查必需的配置文件..."
  33. if [ ! -f config/config.yaml ]; then
  34. echo "❌ 错误: config/config.yaml 文件不存在"
  35. exit 1
  36. fi
  37. if [ ! -f config/frequency_words.txt ]; then
  38. echo "❌ 错误: config/frequency_words.txt 文件不存在"
  39. exit 1
  40. fi
  41. echo "✅ 配置文件检查通过"
  42. - name: Run crawler
  43. env:
  44. FEISHU_WEBHOOK_URL: ${{ secrets.FEISHU_WEBHOOK_URL }}
  45. TELEGRAM_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
  46. TELEGRAM_CHAT_ID: ${{ secrets.TELEGRAM_CHAT_ID }}
  47. DINGTALK_WEBHOOK_URL: ${{ secrets.DINGTALK_WEBHOOK_URL }}
  48. WEWORK_WEBHOOK_URL: ${{ secrets.WEWORK_WEBHOOK_URL }}
  49. WEWORK_MSG_TYPE: ${{ secrets.WEWORK_MSG_TYPE }}
  50. EMAIL_FROM: ${{ secrets.EMAIL_FROM }}
  51. EMAIL_PASSWORD: ${{ secrets.EMAIL_PASSWORD }}
  52. EMAIL_TO: ${{ secrets.EMAIL_TO }}
  53. EMAIL_SMTP_SERVER: ${{ secrets.EMAIL_SMTP_SERVER }}
  54. EMAIL_SMTP_PORT: ${{ secrets.EMAIL_SMTP_PORT }}
  55. NTFY_TOPIC: ${{ secrets.NTFY_TOPIC }}
  56. NTFY_SERVER_URL: ${{ secrets.NTFY_SERVER_URL }}
  57. NTFY_TOKEN: ${{ secrets.NTFY_TOKEN }}
  58. BARK_URL: ${{ secrets.BARK_URL }}
  59. SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
  60. GITHUB_ACTIONS: true
  61. run: python main.py
  62. - name: Commit and push if changes
  63. run: |
  64. git config --global user.name 'GitHub Actions'
  65. git config --global user.email 'actions@github.com'
  66. echo "🔄 Syncing with remote..."
  67. git fetch origin main
  68. # 保存当前更改
  69. git stash --include-untracked || echo "Nothing to stash"
  70. # 同步到远程最新
  71. git reset --hard origin/main
  72. # 恢复本次更改
  73. git stash pop || echo "Nothing to pop"
  74. git add -A
  75. if git diff --quiet && git diff --staged --quiet; then
  76. echo "📭 No changes to commit"
  77. exit 0
  78. fi
  79. echo "📝 Committing changes..."
  80. TIMESTAMP=$(TZ=Asia/Shanghai date "+%Y-%m-%d %H:%M:%S")
  81. git commit -m "Auto update by GitHub Actions at $TIMESTAMP"
  82. echo "⬆️ Pushing changes with retry..."
  83. for i in {1..5}; do
  84. git pull --rebase origin main && git push origin main && {
  85. echo "✅ Successfully pushed on attempt $i"
  86. exit 0
  87. }
  88. echo "⚠️ Attempt $i/$i failed, waiting $((i*3)) seconds..."
  89. sleep $((i * 3))
  90. done
  91. echo "❌ Failed to push after 5 attempts"
  92. exit 1