crawler.yml 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. name: Hot News Crawler
  2. on:
  3. schedule:
  4. - cron: "0 * * * *"
  5. workflow_dispatch:
  6. concurrency:
  7. group: crawler-${{ github.ref_name }}
  8. cancel-in-progress: true
  9. permissions:
  10. contents: write
  11. jobs:
  12. crawl:
  13. runs-on: ubuntu-latest
  14. timeout-minutes: 30
  15. steps:
  16. - name: Checkout repository
  17. uses: actions/checkout@v3
  18. with:
  19. fetch-depth: 0
  20. clean: true
  21. - name: Set up Python
  22. uses: actions/setup-python@v4
  23. with:
  24. python-version: "3.10"
  25. - name: Install dependencies
  26. run: |
  27. python -m pip install --upgrade pip
  28. pip install -r requirements.txt
  29. - name: Verify required files
  30. run: |
  31. echo "🔍 检查必需的配置文件..."
  32. if [ ! -f config/config.yaml ]; then
  33. echo "❌ 错误: config/config.yaml 文件不存在"
  34. exit 1
  35. fi
  36. if [ ! -f config/frequency_words.txt ]; then
  37. echo "❌ 错误: config/frequency_words.txt 文件不存在"
  38. exit 1
  39. fi
  40. echo "✅ 配置文件检查通过"
  41. - name: Run crawler
  42. env:
  43. FEISHU_WEBHOOK_URL: ${{ secrets.FEISHU_WEBHOOK_URL }}
  44. TELEGRAM_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
  45. TELEGRAM_CHAT_ID: ${{ secrets.TELEGRAM_CHAT_ID }}
  46. DINGTALK_WEBHOOK_URL: ${{ secrets.DINGTALK_WEBHOOK_URL }}
  47. WEWORK_WEBHOOK_URL: ${{ secrets.WEWORK_WEBHOOK_URL }}
  48. WEWORK_MSG_TYPE: ${{ secrets.WEWORK_MSG_TYPE }}
  49. EMAIL_FROM: ${{ secrets.EMAIL_FROM }}
  50. EMAIL_PASSWORD: ${{ secrets.EMAIL_PASSWORD }}
  51. EMAIL_TO: ${{ secrets.EMAIL_TO }}
  52. EMAIL_SMTP_SERVER: ${{ secrets.EMAIL_SMTP_SERVER }}
  53. EMAIL_SMTP_PORT: ${{ secrets.EMAIL_SMTP_PORT }}
  54. NTFY_TOPIC: ${{ secrets.NTFY_TOPIC }}
  55. NTFY_SERVER_URL: ${{ secrets.NTFY_SERVER_URL }}
  56. NTFY_TOKEN: ${{ secrets.NTFY_TOKEN }}
  57. BARK_URL: ${{ secrets.BARK_URL }}
  58. SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
  59. GITHUB_ACTIONS: true
  60. run: python main.py
  61. - name: Commit and push if changes
  62. env:
  63. BRANCH_NAME: ${{ github.event.repository.default_branch }}
  64. run: |
  65. git config --global user.name 'GitHub Actions'
  66. git config --global user.email 'actions@github.com'
  67. echo "🔄 Syncing with remote (branch: $BRANCH_NAME)..."
  68. git fetch origin $BRANCH_NAME
  69. # 保存当前更改
  70. git stash --include-untracked || echo "Nothing to stash"
  71. # 同步到远程最新
  72. git reset --hard origin/$BRANCH_NAME
  73. # 恢复本次更改
  74. git stash pop || echo "Nothing to pop"
  75. git add -A
  76. if git diff --quiet && git diff --staged --quiet; then
  77. echo "📭 No changes to commit"
  78. exit 0
  79. fi
  80. echo "📝 Committing changes..."
  81. TIMESTAMP=$(TZ=Asia/Shanghai date "+%Y-%m-%d %H:%M:%S")
  82. git commit -m "Auto update by GitHub Actions at $TIMESTAMP"
  83. echo "⬆️ Pushing changes with retry..."
  84. for i in {1..5}; do
  85. git pull --rebase origin $BRANCH_NAME && git push origin $BRANCH_NAME && {
  86. echo "✅ Successfully pushed on attempt $i"
  87. exit 0
  88. }
  89. echo "⚠️ Attempt $i failed, waiting $((i*3)) seconds..."
  90. sleep $((i * 3))
  91. done
  92. echo "❌ Failed to push after 5 attempts"
  93. exit 1