crawler.yml 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170
  1. name: Get Hot News
  2. on:
  3. schedule:
  4. # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
  5. # ⚠️ 试用版说明 / Trial Mode
  6. # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
  7. #
  8. # 🔄 运行机制 / How it works:
  9. # - 每个周期为 7 天,届时自动停止
  10. # - 运行 "Check In" 会重置周期(重新开始 7 天倒计时,而非累加)
  11. # - Each cycle is 7 days, then auto-stops
  12. # - "Check In" resets the cycle (restarts 7-day countdown, not cumulative)
  13. #
  14. # 💡 设计初衷 / Why this design:
  15. # 如果 7 天都忘了签到,或许这些资讯对你来说并非刚需
  16. # 适时的暂停,能帮你从信息流中抽离,给大脑留出喘息的空间
  17. # If you forget for 7 days, maybe you don't really need it
  18. # A timely pause helps you detach from the stream and gives your mind space
  19. #
  20. # 🙏 珍惜资源 / Respect shared resources:
  21. # GitHub Actions 是平台提供的公共资源,每次运行都会消耗算力
  22. # 签到机制确保资源分配给真正需要的用户,感谢你的理解与配合
  23. # GitHub Actions is a shared public resource provided by the platform
  24. # Check-in ensures resources go to those who truly need it — thank you
  25. #
  26. # 🚀 长期使用请部署 Docker 版本 / For long-term use, deploy Docker version
  27. #
  28. # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
  29. #
  30. # 📝 修改运行时间:只改第一个数字(0-59),表示每小时第几分钟运行
  31. # 📝 Change time: Only modify the first number (0-59) = minute of each hour
  32. #
  33. # 示例 / Examples:
  34. # "15 * * * *" → 每小时第15分钟 / minute 15 every hour
  35. # "30 0-14 * * *" → 北京时间 8:00-22:00 每小时第30分钟 / Beijing 8am-10pm
  36. #
  37. - cron: "33 * * * *"
  38. workflow_dispatch:
  39. concurrency:
  40. group: crawler-${{ github.ref_name }}
  41. cancel-in-progress: true
  42. permissions:
  43. contents: read
  44. actions: write
  45. jobs:
  46. crawl:
  47. runs-on: ubuntu-latest
  48. timeout-minutes: 15
  49. steps:
  50. - name: Checkout repository
  51. uses: actions/checkout@v6
  52. with:
  53. fetch-depth: 1
  54. clean: true
  55. - name: Check Expiration
  56. env:
  57. GH_TOKEN: ${{ github.token }}
  58. run: |
  59. WORKFLOW_FILE="crawler.yml"
  60. API_URL="repos/${{ github.repository }}/actions/workflows/$WORKFLOW_FILE/runs"
  61. TOTAL=$(gh api "$API_URL" --jq '.total_count')
  62. if [ -z "$TOTAL" ] || [ "$TOTAL" -eq 0 ]; then
  63. echo "No previous runs found, skipping expiration check"
  64. exit 0
  65. fi
  66. LAST_PAGE=$(( (TOTAL + 99) / 100 ))
  67. FIRST_RUN_DATE=$(gh api "$API_URL?per_page=100&page=$LAST_PAGE" --jq '.workflow_runs[-1].created_at')
  68. if [ -n "$FIRST_RUN_DATE" ]; then
  69. CURRENT_TIMESTAMP=$(date +%s)
  70. FIRST_RUN_TIMESTAMP=$(date -d "$FIRST_RUN_DATE" +%s)
  71. DIFF_SECONDS=$((CURRENT_TIMESTAMP - FIRST_RUN_TIMESTAMP))
  72. LIMIT_SECONDS=604800
  73. if [ $DIFF_SECONDS -gt $LIMIT_SECONDS ]; then
  74. echo "⚠️ 试用期已结束,请运行 'Check In' 签到续期"
  75. echo "⚠️ Trial expired. Run 'Check In' to renew."
  76. gh workflow disable "$WORKFLOW_FILE"
  77. exit 1
  78. else
  79. DAYS_LEFT=$(( (LIMIT_SECONDS - DIFF_SECONDS) / 86400 ))
  80. echo "✅ 试用期剩余 ${DAYS_LEFT} 天,到期前请运行 'Check In' 签到续期"
  81. echo "✅ Trial: ${DAYS_LEFT} days left. Run 'Check In' before expiry to renew."
  82. fi
  83. fi
  84. # --------------------------------------------------------------------------------
  85. # 🚦 TRAFFIC CONTROL / 流量控制
  86. # --------------------------------------------------------------------------------
  87. # EN: Generates a random delay between 1 and 300 seconds (5 minutes).
  88. # Critical for load balancing.
  89. #
  90. # CN: 生成 1 到 300 秒(5分钟)之间的随机延迟。
  91. # 这对负载均衡至关重要。
  92. # - name: Random Delay (Traffic Control)
  93. # if: success()
  94. # run: |
  95. # echo "🎲 Traffic Control: Generating random delay..."
  96. # DELAY=$(( ( RANDOM % 300 ) + 1 ))
  97. # echo "⏸️ Sleeping for ${DELAY} seconds to spread the load..."
  98. # sleep ${DELAY}s
  99. # echo "▶️ Delay finished. Starting crawler..."
  100. - name: Set up Python
  101. if: success()
  102. uses: actions/setup-python@v6
  103. with:
  104. python-version: "3.12"
  105. - name: Install uv
  106. if: success()
  107. uses: astral-sh/setup-uv@v7
  108. - name: Install dependencies
  109. if: success()
  110. run: uv sync --frozen --no-dev
  111. - name: Verify required files
  112. if: success()
  113. run: |
  114. if [ ! -f config/config.yaml ]; then
  115. echo "Error: Config missing"
  116. exit 1
  117. fi
  118. - name: Run crawler
  119. if: success()
  120. env:
  121. FEISHU_WEBHOOK_URL: ${{ secrets.FEISHU_WEBHOOK_URL }}
  122. TELEGRAM_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
  123. TELEGRAM_CHAT_ID: ${{ secrets.TELEGRAM_CHAT_ID }}
  124. DINGTALK_WEBHOOK_URL: ${{ secrets.DINGTALK_WEBHOOK_URL }}
  125. WEWORK_WEBHOOK_URL: ${{ secrets.WEWORK_WEBHOOK_URL }}
  126. WEWORK_MSG_TYPE: ${{ secrets.WEWORK_MSG_TYPE }}
  127. EMAIL_FROM: ${{ secrets.EMAIL_FROM }}
  128. EMAIL_PASSWORD: ${{ secrets.EMAIL_PASSWORD }}
  129. EMAIL_TO: ${{ secrets.EMAIL_TO }}
  130. EMAIL_SMTP_SERVER: ${{ secrets.EMAIL_SMTP_SERVER }}
  131. EMAIL_SMTP_PORT: ${{ secrets.EMAIL_SMTP_PORT }}
  132. NTFY_TOPIC: ${{ secrets.NTFY_TOPIC }}
  133. NTFY_SERVER_URL: ${{ secrets.NTFY_SERVER_URL }}
  134. NTFY_TOKEN: ${{ secrets.NTFY_TOKEN }}
  135. BARK_URL: ${{ secrets.BARK_URL }}
  136. SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
  137. # 通用Webhook配置
  138. GENERIC_WEBHOOK_URL: ${{ secrets.GENERIC_WEBHOOK_URL }}
  139. GENERIC_WEBHOOK_TEMPLATE: ${{ secrets.GENERIC_WEBHOOK_TEMPLATE }}
  140. # AI 配置(ai_analysis 和 ai_translation 共享模型配置)
  141. AI_ANALYSIS_ENABLED: ${{ secrets.AI_ANALYSIS_ENABLED }}
  142. AI_API_KEY: ${{ secrets.AI_API_KEY }}
  143. AI_MODEL: ${{ secrets.AI_MODEL }}
  144. AI_API_BASE: ${{ secrets.AI_API_BASE }}
  145. # 远程存储配置
  146. S3_BUCKET_NAME: ${{ secrets.S3_BUCKET_NAME }}
  147. S3_ACCESS_KEY_ID: ${{ secrets.S3_ACCESS_KEY_ID }}
  148. S3_SECRET_ACCESS_KEY: ${{ secrets.S3_SECRET_ACCESS_KEY }}
  149. S3_ENDPOINT_URL: ${{ secrets.S3_ENDPOINT_URL }}
  150. S3_REGION: ${{ secrets.S3_REGION }}
  151. GITHUB_ACTIONS: true
  152. run: uv run python -m trendradar