7 tháng trước cách đây · c7bacdfff7
--- a/.github/ISSUE_TEMPLATE/01-bug-report.yml
+++ b/.github/ISSUE_TEMPLATE/01-bug-report.yml
@@ -4,8 +4,6 @@ name: 🐛 遇到问题了
 
				 description: 程序运行不正常或出现错误
			
 
				 title: "[问题] "
			
 
				 labels: ["bug"]
			
 
				-assignees:
			
 
				-  - sansan0
			
 
				 body:
			
 
				   - type: markdown
			
 
				     attributes:
			
--- a/.github/ISSUE_TEMPLATE/02-feature-request.yml
+++ b/.github/ISSUE_TEMPLATE/02-feature-request.yml
@@ -4,8 +4,6 @@ name: 💡 我有个想法
 
				 description: 建议新功能或改进现有功能
			
 
				 title: "[建议] "
			
 
				 labels: ["enhancement"]
			
 
				-assignees:
			
 
				-  - sansan0
			
 
				 body:
			
 
				   - type: markdown
			
 
				     attributes:
			
--- a/.github/ISSUE_TEMPLATE/03-config-help.yml
+++ b/.github/ISSUE_TEMPLATE/03-config-help.yml
@@ -4,8 +4,6 @@ name: ⚙️ 设置遇到困难
 
				 description: 配置相关的问题或需要帮助
			
 
				 title: "[设置] "
			
 
				 labels: ["配置", "帮助"]
			
 
				-assignees:
			
 
				-  - sansan0
			
 
				 body:
			
 
				   - type: markdown
			
 
				     attributes:
			
--- a/.github/workflows/clean-crawler.yml
+++ b/.github/workflows/clean-crawler.yml
@@ -0,0 +1,28 @@
 
				+name: Check In
			
 
				+
			
 
				+# ✅ 签到续期：运行此 workflow 可重置 7 天计时，保持 "Get Hot News" 正常运行
			
 
				+# ✅ Renewal: Run this workflow to reset the 7-day timer and keep "Get Hot News" active
			
 
				+#
			
 
				+# 📌 操作方法 / How to use:
			
 
				+#   1. 点击 "Run workflow" 按钮 / Click "Run workflow" button
			
 
				+#   2. 每 7 天内至少运行一次 / Run at least once every 7 days
			
 
				+
			
 
				+on:
			
 
				+  workflow_dispatch:
			
 
				+
			
 
				+jobs:
			
 
				+  del_runs:
			
 
				+    runs-on: ubuntu-latest
			
 
				+    permissions:
			
 
				+      actions: write
			
 
				+      contents: read
			
 
				+    steps:
			
 
				+      - name: Delete all workflow runs
			
 
				+        uses: Mattraks/delete-workflow-runs@v2
			
 
				+        with:
			
 
				+          token: ${{ github.token }}
			
 
				+          repository: ${{ github.repository }}
			
 
				+          retain_days: 0
			
 
				+          keep_minimum_runs: 0
			
 
				+          delete_workflow_by_state_pattern: "ALL"
			
 
				+          delete_run_by_conclusion_pattern: "ALL"
			
--- a/.github/workflows/crawler.yml
+++ b/.github/workflows/crawler.yml
@@ -0,0 +1,163 @@
 
				+name: Get Hot News
			
 
				+
			
 
				+on:
			
 
				+  schedule:
			
 
				+    # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
			
 
				+    # ⚠️ 试用版说明 / Trial Mode
			
 
				+    # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
			
 
				+    #
			
 
				+    # 🔄 运行机制 / How it works:
			
 
				+    #    - 每个周期为 7 天，届时自动停止
			
 
				+    #    - 运行 "Check In" 会重置周期（重新开始 7 天倒计时，而非累加）
			
 
				+    #    - Each cycle is 7 days, then auto-stops
			
 
				+    #    - "Check In" resets the cycle (restarts 7-day countdown, not cumulative)
			
 
				+    #
			
 
				+    # 💡 设计初衷 / Why this design:
			
 
				+    #    如果 7 天都忘了签到，或许这些资讯对你来说并非刚需
			
 
				+    #    适时的暂停，能帮你从信息流中抽离，给大脑留出喘息的空间
			
 
				+    #    If you forget for 7 days, maybe you don't really need it
			
 
				+    #    A timely pause helps you detach from the stream and gives your mind space
			
 
				+    #
			
 
				+    # 🙏 珍惜资源 / Respect shared resources:
			
 
				+    #    GitHub Actions 是平台提供的公共资源，每次运行都会消耗算力
			
 
				+    #    签到机制确保资源分配给真正需要的用户，感谢你的理解与配合
			
 
				+    #    GitHub Actions is a shared public resource provided by the platform
			
 
				+    #    Check-in ensures resources go to those who truly need it — thank you
			
 
				+    #
			
 
				+    # 🚀 长期使用请部署 Docker 版本 / For long-term use, deploy Docker version
			
 
				+    #
			
 
				+    # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
			
 
				+    #
			
 
				+    # 📝 修改运行时间：只改第一个数字（0-59），表示每小时第几分钟运行
			
 
				+    # 📝 Change time: Only modify the first number (0-59) = minute of each hour
			
 
				+    #
			
 
				+    # 示例 / Examples:
			
 
				+    #   "15 * * * *"     → 每小时第15分钟 / minute 15 every hour
			
 
				+    #   "30 0-14 * * *"  → 北京时间 8:00-22:00 每小时第30分钟 / Beijing 8am-10pm
			
 
				+    #
			
 
				+    - cron: "33 * * * *"
			
 
				+
			
 
				+  workflow_dispatch:
			
 
				+
			
 
				+concurrency:
			
 
				+  group: crawler-${{ github.ref_name }}
			
 
				+  cancel-in-progress: true
			
 
				+
			
 
				+permissions:
			
 
				+  contents: read
			
 
				+  actions: write
			
 
				+
			
 
				+jobs:
			
 
				+  crawl:
			
 
				+    runs-on: ubuntu-latest
			
 
				+    timeout-minutes: 15
			
 
				+
			
 
				+    steps:
			
 
				+      - name: Checkout repository
			
 
				+        uses: actions/checkout@v4
			
 
				+        with:
			
 
				+          fetch-depth: 1
			
 
				+          clean: true
			
 
				+
			
 
				+      - name: Check Expiration
			
 
				+        env:
			
 
				+          GH_TOKEN: ${{ github.token }}
			
 
				+        run: |
			
 
				+          WORKFLOW_FILE="crawler.yml"
			
 
				+          API_URL="repos/${{ github.repository }}/actions/workflows/$WORKFLOW_FILE/runs"
			
 
				+
			
 
				+          TOTAL=$(gh api "$API_URL" --jq '.total_count')
			
 
				+          if [ -z "$TOTAL" ] || [ "$TOTAL" -eq 0 ]; then
			
 
				+            echo "No previous runs found, skipping expiration check"
			
 
				+            exit 0
			
 
				+          fi
			
 
				+
			
 
				+          LAST_PAGE=$(( (TOTAL + 99) / 100 ))
			
 
				+          FIRST_RUN_DATE=$(gh api "$API_URL?per_page=100&page=$LAST_PAGE" --jq '.workflow_runs[-1].created_at')
			
 
				+
			
 
				+          if [ -n "$FIRST_RUN_DATE" ]; then
			
 
				+            CURRENT_TIMESTAMP=$(date +%s)
			
 
				+            FIRST_RUN_TIMESTAMP=$(date -d "$FIRST_RUN_DATE" +%s)
			
 
				+            DIFF_SECONDS=$((CURRENT_TIMESTAMP - FIRST_RUN_TIMESTAMP))
			
 
				+            LIMIT_SECONDS=604800
			
 
				+
			
 
				+            if [ $DIFF_SECONDS -gt $LIMIT_SECONDS ]; then
			
 
				+              echo "⚠️ 试用期已结束，请运行 'Check In' 签到续期"
			
 
				+              echo "⚠️ Trial expired. Run 'Check In' to renew."
			
 
				+              gh workflow disable "$WORKFLOW_FILE"
			
 
				+              exit 1
			
 
				+            else
			
 
				+              DAYS_LEFT=$(( (LIMIT_SECONDS - DIFF_SECONDS) / 86400 ))
			
 
				+              echo "✅ 试用期剩余 ${DAYS_LEFT} 天，到期前请运行 'Check In' 签到续期"
			
 
				+              echo "✅ Trial: ${DAYS_LEFT} days left. Run 'Check In' before expiry to renew."
			
 
				+            fi
			
 
				+          fi
			
 
				+
			
 
				+
			
 
				+      # --------------------------------------------------------------------------------
			
 
				+      # 🚦 TRAFFIC CONTROL / 流量控制
			
 
				+      # --------------------------------------------------------------------------------
			
 
				+      # EN: Generates a random delay between 1 and 300 seconds (5 minutes).
			
 
				+      #     Critical for load balancing.
			
 
				+      #
			
 
				+      # CN: 生成 1 到 300 秒（5分钟）之间的随机延迟。
			
 
				+      #     这对负载均衡至关重要。
			
 
				+      - name: Random Delay (Traffic Control)
			
 
				+        if: success()
			
 
				+        run: |
			
 
				+          echo "🎲 Traffic Control: Generating random delay..."
			
 
				+          DELAY=$(( ( RANDOM % 300 )  + 1 ))
			
 
				+          echo "⏸️  Sleeping for ${DELAY} seconds to spread the load..."
			
 
				+          sleep ${DELAY}s
			
 
				+          echo "▶️  Delay finished. Starting crawler..."
			
 
				+
			
 
				+      - name: Set up Python
			
 
				+        if: success()
			
 
				+        uses: actions/setup-python@v5
			
 
				+        with:
			
 
				+          python-version: "3.10"
			
 
				+          cache: "pip"
			
 
				+
			
 
				+      - name: Install dependencies
			
 
				+        if: success()
			
 
				+        run: |
			
 
				+          python -m pip install --upgrade pip
			
 
				+          pip install -r requirements.txt
			
 
				+
			
 
				+      - name: Verify required files
			
 
				+        if: success()
			
 
				+        run: |
			
 
				+          if [ ! -f config/config.yaml ]; then
			
 
				+            echo "Error: Config missing"
			
 
				+            exit 1
			
 
				+          fi
			
 
				+
			
 
				+      - name: Run crawler
			
 
				+        if: success()
			
 
				+        env:
			
 
				+          FEISHU_WEBHOOK_URL: ${{ secrets.FEISHU_WEBHOOK_URL }}
			
 
				+          TELEGRAM_BOT_TOKEN: ${{ secrets.TELEGRAM_BOT_TOKEN }}
			
 
				+          TELEGRAM_CHAT_ID: ${{ secrets.TELEGRAM_CHAT_ID }}
			
 
				+          DINGTALK_WEBHOOK_URL: ${{ secrets.DINGTALK_WEBHOOK_URL }}
			
 
				+          WEWORK_WEBHOOK_URL: ${{ secrets.WEWORK_WEBHOOK_URL }}
			
 
				+          WEWORK_MSG_TYPE: ${{ secrets.WEWORK_MSG_TYPE }}
			
 
				+          EMAIL_FROM: ${{ secrets.EMAIL_FROM }}
			
 
				+          EMAIL_PASSWORD: ${{ secrets.EMAIL_PASSWORD }}
			
 
				+          EMAIL_TO: ${{ secrets.EMAIL_TO }}
			
 
				+          EMAIL_SMTP_SERVER: ${{ secrets.EMAIL_SMTP_SERVER }}
			
 
				+          EMAIL_SMTP_PORT: ${{ secrets.EMAIL_SMTP_PORT }}
			
 
				+          NTFY_TOPIC: ${{ secrets.NTFY_TOPIC }}
			
 
				+          NTFY_SERVER_URL: ${{ secrets.NTFY_SERVER_URL }}
			
 
				+          NTFY_TOKEN: ${{ secrets.NTFY_TOKEN }}
			
 
				+          BARK_URL: ${{ secrets.BARK_URL }}
			
 
				+          SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
			
 
				+          STORAGE_BACKEND: auto
			
 
				+          LOCAL_RETENTION_DAYS: ${{ secrets.LOCAL_RETENTION_DAYS }}
			
 
				+          REMOTE_RETENTION_DAYS: ${{ secrets.REMOTE_RETENTION_DAYS }}
			
 
				+          S3_BUCKET_NAME: ${{ secrets.S3_BUCKET_NAME }}
			
 
				+          S3_ACCESS_KEY_ID: ${{ secrets.S3_ACCESS_KEY_ID }}
			
 
				+          S3_SECRET_ACCESS_KEY: ${{ secrets.S3_SECRET_ACCESS_KEY }}
			
 
				+          S3_ENDPOINT_URL: ${{ secrets.S3_ENDPOINT_URL }}
			
 
				+          S3_REGION: ${{ secrets.S3_REGION }}
			
 
				+          GITHUB_ACTIONS: true
			
 
				+        run: python -m trendradar
			
--- a/README-EN.md
+++ b/README-EN.md
@@ -1,6 +1,6 @@
 
				 <div align="center" id="trendradar">
			
 
				 
			
 
				-> **📢 Announcement:** After communicating with GitHub officials, "One-Click Fork Deployment" will be restored after compliance adjustments are completed. Please stay tuned for **v4.0.0** update
			
 
				+> **📢 Announcement:** **v4.0.0** has been released! Including storage architecture refactoring, database optimization, modularization improvements, and more major updates
			
 
				 
			
 
				 <a href="https://github.com/sansan0/TrendRadar" title="TrendRadar">
			
 
				   <img src="/_image/banner.webp" alt="TrendRadar Banner" width="80%">
			
@@ -16,8 +16,8 @@
 
				 [![GitHub Stars](https://img.shields.io/github/stars/sansan0/TrendRadar?style=flat-square&logo=github&color=yellow)](https://github.com/sansan0/TrendRadar/stargazers)
			
 
				 [![GitHub Forks](https://img.shields.io/github/forks/sansan0/TrendRadar?style=flat-square&logo=github&color=blue)](https://github.com/sansan0/TrendRadar/network/members)
			
 
				 [![License](https://img.shields.io/badge/license-GPL--3.0-blue.svg?style=flat-square)](LICENSE)
			
 
				-[![Version](https://img.shields.io/badge/version-v3.5.0-blue.svg)](https://github.com/sansan0/TrendRadar)
			
 
				-[![MCP](https://img.shields.io/badge/MCP-v1.0.3-green.svg)](https://github.com/sansan0/TrendRadar)
			
 
				+[![Version](https://img.shields.io/badge/version-v4.0.0-blue.svg)](https://github.com/sansan0/TrendRadar)
			
 
				+[![MCP](https://img.shields.io/badge/MCP-v1.1.0-green.svg)](https://github.com/sansan0/TrendRadar)
			
 
				 
			
 
				 [![WeWork](https://img.shields.io/badge/WeWork-Notification-00D4AA?style=flat-square)](https://work.weixin.qq.com/)
			
 
				 [![WeChat](https://img.shields.io/badge/WeChat-Notification-00D4AA?style=flat-square)](https://weixin.qq.com/)
			
@@ -48,62 +48,61 @@
 
				 <br>
			
 
				 
			
 
				 <details>
			
 
				-<summary>🚨 <strong>【MUST READ】Important Announcement: The Correct Way to Deploy This Project</strong></summary>
			
 
				+<summary>🚨 <strong>【Must Read】Important Announcement: v4.0.0 Deployment & Storage Architecture Changes</strong></summary>
			
 
				 
			
 
				 <br>
			
 
				 
			
 
				-> **⚠️ December 2025 Urgent Notice**
			
 
				->
			
 
				-> Due to a surge in Fork numbers causing excessive load on GitHub servers, **GitHub Actions and GitHub Pages deployments are currently restricted**. Please read the following instructions carefully to ensure successful deployment.
			
 
				+### 🛠️ Choose the Deployment Method That Fits You
			
 
				+
			
 
				+#### 🅰️ Option 1: Docker Deployment (Recommended 🔥)
			
 
				 
			
 
				-### 1. ✅ Only Recommended Deployment Method: Docker
			
 
				+* **Features**: Most stable and simplest. Data is stored in **local SQLite**, fully under your control.
			
 
				 
			
 
				-**This is currently the most stable solution, free from GitHub restrictions.** Data is stored locally and won't be affected by GitHub policy changes.
			
 
				+* **Best for**: Users with their own server, NAS, or an always-on PC.
			
 
				 
			
 
				 * 👉 [Jump to Docker Deployment Tutorial](#6-docker-deployment)
			
 
				 
			
 
				 ---
			
 
				 
			
 
				-### 2. If You Were Planning to Fork This Project...
			
 
				+#### 🅱️ Option 2: GitHub Actions Deployment (Restored ✅)
			
 
				 
			
 
				-To reduce pressure on GitHub servers, **please DO NOT directly click the "Fork" button!**
			
 
				+* **Features**: Data is no longer committed directly to the repo. Instead, it is stored in **Remote Cloud Storage** (supports S3-compatible protocols: Cloudflare R2, Alibaba Cloud OSS, Tencent Cloud COS, etc.).
			
 
				 
			
 
				-Please use the **"Use this template"** feature instead of Fork:
			
 
				+* **Requirement**: You **must** configure an S3-compatible object storage service (Cloudflare R2 recommended, it's free).
			
 
				 
			
 
				-1.  **Click** the green **[Use this template]** button in the top right corner of the original repository page.
			
 
				-2.  **Select** "Create a new repository".
			
 
				+> **⚠️ Note**: If you choose this option, you must complete the following two configuration steps:
			
 
				 
			
 
				-**Why do this?**
			
 
				-* **❌ Fork**: Copies complete history records. Many forks running simultaneously will trigger GitHub risk control.
			
 
				-* **✅ Use this template**: Creates a completely new independent repository without historical baggage, more server-friendly.
			
 
				+#### 1. 🚀 Recommended Start: Use this template
			
 
				 
			
 
				----
			
 
				+To keep the repository clean and avoid inheriting redundant history, I **recommend** using Template mode:
			
 
				 
			
 
				-### 3. About New Data Storage
			
 
				+1.  **Click** the green **[Use this template]** button at the top right of the original repository page.
			
 
				 
			
 
				-The new version will use **Cloudflare R2** to store news data, ensuring data persistence.
			
 
				+2.  **Select** "Create a new repository".
			
 
				 
			
 
				-**⚠️ Configuration Prerequisites:**
			
 
				+> **💡 Why do this?**
			
 
				+> * **Use this template**: Creates a brand new, clean repository with no historical baggage.
			
 
				+> * **Fork**: Retains the complete commit history and relationships, consuming more GitHub resources.
			
 
				 
			
 
				-According to Cloudflare platform rules, activating R2 requires binding a payment method.
			
 
				+#### 2. ☁️ About the Mandatory Remote Storage for GitHub Actions
			
 
				 
			
 
				-- **Purpose:** Identity verification only (Verify Only), no charges will be incurred.
			
 
				-- **Payment:** Supports credit cards or PayPal (China region).
			
 
				-- **Usage:** R2's free tier is sufficient to cover this project's daily operation, no payment required.
			
 
				+If you choose **Option 2 (GitHub Actions)**, you must configure an S3-compatible object storage service.
			
 
				 
			
 
				----
			
 
				+**Supported Storage Services:**
			
 
				+- **Cloudflare R2** (Recommended, generous free tier)
			
 
				+- Other S3-compatible services
			
 
				+
			
 
				+**⚠️ Configuration Prerequisites (Using Cloudflare R2 as Example):**
			
 
				 
			
 
				-### 4. 📅 Future Plans & Documentation Reading Notes
			
 
				+According to Cloudflare platform rules, enabling R2 requires binding a payment method.
			
 
				 
			
 
				-> **Future Plans:**
			
 
				-> - Exploring new approach: keep Actions for fetching and pushing, but no longer save data to repository, use external storage instead.
			
 
				+* **Purpose**: Identity verification only (Verify Only). **No charges will be incurred**.
			
 
				 
			
 
				-**⚠️ Reading Note:**
			
 
				-Given that the above plans mean **Fork deployment mode may return in a new form in the future**, and the workload to fully revise documentation is massive, we have temporarily retained the old descriptions.
			
 
				+* **Payment**: Supports international credit cards or PayPal.
			
 
				 
			
 
				-**At the current stage, if "Fork" related expressions still appear in subsequent tutorials, please ignore them or understand them as "Use this template"**.
			
 
				+* **Usage**: The R2 free tier (10GB storage/month) is sufficient to cover the daily operation of this project. No need to worry about costs.
			
 
				 
			
 
				-👉 **[Click here to view TrendRadar's latest official documentation](https://github.com/sansan0/TrendRadar?tab=readme-ov-file)**
			
 
				+👉 **[Click to View Detailed Configuration Tutorial](#-quick-start)**
			
 
				 
			
 
				 </details>
			
 
				 
			
@@ -287,10 +286,32 @@ Supports **WeWork** (+ WeChat push solution), **Feishu**, **DingTalk**, **Telegr
 
				 - ⚠️ **Paired Configuration**: Telegram and ntfy require paired parameter quantities to match (e.g., token and chat_id both have 2 values)
			
 
				 - ⚠️ **Quantity Limit**: Default maximum 3 accounts per channel, exceeded values will be truncated
			
 
				 
			
 
				-### **Multi-Platform Support**
			
 
				-- **GitHub Pages**: Auto-generate beautiful web reports, PC/mobile adapted
			
 
				+### **Flexible Storage Architecture (v4.0.0 Major Update)**
			
 
				+
			
 
				+**Multi-Backend Support**:
			
 
				+- ☁️ **Remote Cloud Storage**: GitHub Actions environment default, supports S3-compatible protocols (R2/OSS/COS, etc.), data stored in cloud, keeping repository clean
			
 
				+- 💾 **Local SQLite**: Traditional SQLite database, stable and efficient (Docker/local deployment)
			
 
				+- 🔀 **Auto Selection**: Auto-selects appropriate backend based on runtime environment
			
 
				+
			
 
				+**Data Format Hierarchy**:
			
 
				+
			
 
				+| Format | Role | Description |
			
 
				+|--------|------|-------------|
			
 
				+| **SQLite** | Primary storage | Complete data with statistics information |
			
 
				+| **TXT** | Human-readable backup | Optional text records for manual viewing |
			
 
				+| **HTML** | Web report | Beautiful visual report (GitHub Pages) |
			
 
				+
			
 
				+**Data Management Features**:
			
 
				+- Auto data cleanup (configurable retention period)
			
 
				+- Timezone support (configurable IANA time zone)
			
 
				+- Cloud/local seamless switching
			
 
				+
			
 
				+> 💡 For storage configuration details, see [Configuration Details - Storage Configuration](#11-storage-configuration-v400-new)
			
 
				+
			
 
				+### **Multi-Platform Deployment**
			
 
				+- **GitHub Actions**: Cloud automated operations (7-day check-in cycle + remote cloud storage)
			
 
				 - **Docker Deployment**: Supports multi-architecture containerized operation
			
 
				-- **Data Persistence**: HTML/TXT multi-format history saving
			
 
				+- **Local Running**: Python environment direct execution
			
 
				 
			
 
				 
			
 
				 ### **AI Smart Analysis (v3.0.0 New)**
			
@@ -341,10 +362,32 @@ Transform from "algorithm recommendation captivity" to "actively getting the inf
 
				 >**Upgrade Instructions**:
			
 
				 - **📌 Check Latest Updates**: **[Original Repository Changelog](https://github.com/sansan0/TrendRadar?tab=readme-ov-file#-changelog)**
			
 
				 - **Tip**: Do NOT update this project via **Sync fork**. Check [Changelog] to understand specific [Upgrade Methods] and [Features]
			
 
				-- **Minor Version Update**: Upgrading from v2.x to v2.y, replace `main.py` in your forked repo with the latest version
			
 
				 - **Major Version Upgrade**: Upgrading from v1.x to v2.y, recommend deleting existing fork and re-forking to save effort and avoid config conflicts
			
 
				 
			
 
				 
			
 
				+### 2025/12/13 - v4.0.0
			
 
				+
			
 
				+**🎉 Major Update: Comprehensive Refactoring of Storage and Core Architecture**
			
 
				+
			
 
				+- **Multi-Storage Backend Support**: Introduced a brand new storage module supporting local SQLite and remote cloud storage (S3-compatible protocols, Cloudflare R2 recommended for free tier), adaptable to GitHub Actions, Docker, and local environments.
			
 
				+- **Database Structure Optimization**: Refactored SQLite database table structures to improve data efficiency and query performance.
			
 
				+- **Enhanced Features**: Implemented date format standardization, data retention policies, timezone configuration support, and optimized time display. Fixed remote storage data persistence issues to ensure accurate data merging.
			
 
				+- **Cleanup and Compatibility**: Removed most legacy compatibility code and unified data storage and retrieval methods.
			
 
				+
			
 
				+### 2025/12/13 - mcp-v1.1.0
			
 
				+
			
 
				+**MCP Module Update:**
			
 
				+- Adapted for v4.0.0, while maintaining compatibility with v3.x data.
			
 
				+- Added storage sync tools:
			
 
				+  - `sync_from_remote`: Pull data from remote storage to local
			
 
				+  - `get_storage_status`: Get storage configuration and status
			
 
				+  - `list_available_dates`: List available dates in local/remote storage
			
 
				+
			
 
				+
			
 
				+<details>
			
 
				+<summary>👉 Click to expand: <strong>Historical Updates</strong></summary>
			
 
				+
			
 
				+
			
 
				 ### 2025/12/03 - v3.5.0
			
 
				 
			
 
				 **🎉 Core Feature Enhancements**
			
@@ -397,7 +440,7 @@ Transform from "algorithm recommendation captivity" to "actively getting the inf
 
				 
			
 
				 **🔧 Upgrade Instructions**:
			
 
				 - **GitHub Fork Users**: Update `main.py`, `config/config.yaml` (Added multi-account push support, existing single-account configuration unaffected)
			
 
				-- **Docker Users**: Update `.env`, `docker compose.yml` or set environment variables `REVERSE_CONTENT_ORDER`, `MAX_ACCOUNTS_PER_CHANNEL`
			
 
				+- **Docker Users**: Update `.env`, `docker-compose.yml` or set environment variables `REVERSE_CONTENT_ORDER`, `MAX_ACCOUNTS_PER_CHANNEL`
			
 
				 - **Multi-Account Push**: New feature, disabled by default, existing single-account configuration unaffected
			
 
				 
			
 
				 
			
@@ -431,10 +474,6 @@ Transform from "algorithm recommendation captivity" to "actively getting the inf
 
				   - Tool count increased from 13 to 14
			
 
				 
			
 
				 
			
 
				-<details>
			
 
				-<summary>👉 Click to expand: <strong>Historical Updates</strong></summary>
			
 
				-
			
 
				-
			
 
				 ### 2025/11/25 - v3.4.0
			
 
				 
			
 
				 **🎉 Added Slack Push Support**
			
@@ -819,11 +858,44 @@ frequency_words.txt file added **required word** feature, using + sign
 
				 
			
 
				 > **📖 Reminder**: Fork users should first **[check the latest official documentation](https://github.com/sansan0/TrendRadar?tab=readme-ov-file)** to ensure the configuration steps are up to date.
			
 
				 
			
 
				+### ⚠️ GitHub Actions Usage Instructions
			
 
				+
			
 
				+**v4.0.0 Important Change**: Introduced "Activity Detection" mechanism—GitHub Actions now requires periodic check-in to maintain operation.
			
 
				+
			
 
				+#### 🔄 Check-In Renewal Mechanism
			
 
				+
			
 
				+- **Running Cycle**: Valid for **7 days**—service will automatically suspend when countdown ends.
			
 
				+- **Renewal Method**: Manually trigger the "Check In" workflow on the Actions page to reset the 7-day validity period.
			
 
				+- **Operation Path**: `Actions` → `Check In` → `Run workflow`
			
 
				+- **Design Philosophy**:
			
 
				+    - If you forget for 7 days, maybe you don't really need it. Letting it stop is a digital detox, freeing you from the constant impact.
			
 
				+    - GitHub Actions is a valuable public computing resource. The check-in mechanism aims to prevent wasted computing cycles, ensuring resources are allocated to truly active users who need them. Thank you for your understanding and support.
			
 
				+
			
 
				+#### 📦 Data Storage (Required Configuration)
			
 
				+
			
 
				+In GitHub Actions environment, data is stored in **Remote Cloud Storage** (supports S3-compatible protocols, Cloudflare R2 recommended for free tier), keeping your repository clean (see **Required Configuration: Remote Cloud Storage** below).
			
 
				+
			
 
				+#### 🚀 Recommended: Docker Deployment
			
 
				+
			
 
				+For long-term stable operation, we recommend [Docker Deployment](#6-docker-deployment), with data stored locally and no check-in required—though it does require purchasing a cloud server.
			
 
				+
			
 
				+<br>
			
 
				+
			
 
				+> 🎉 **Now Supported: Multi-Cloud Storage Options**
			
 
				+>
			
 
				+> This project now supports S3-compatible protocols. You can choose:
			
 
				+> - **Cloudflare R2** (Recommended, generous free tier)
			
 
				+> - Other S3-compatible storage services
			
 
				+>
			
 
				+> Simply configure the corresponding `S3_ENDPOINT_URL`, `S3_BUCKET_NAME` and other environment variables to switch.
			
 
				+
			
 
				+---
			
 
				+
			
 
				 1. **Fork this project** to your GitHub account
			
 
				 
			
 
				    - Click the "Fork" button at the top right of this page
			
 
				 
			
 
				-2. **Setup GitHub Secrets (Choose your needed platforms)**:
			
 
				+2. **Setup GitHub Secrets (Required + Optional Platforms)**:
			
 
				 
			
 
				    In your forked repo, go to `Settings` > `Secrets and variables` > `Actions` > `New repository secret`
			
 
				 
			
@@ -862,6 +934,35 @@ frequency_words.txt file added **required word** feature, using + sign
 
				 
			
 
				    <br>
			
 
				 
			
 
				+   <details>
			
 
				+   <summary>⚠️ <strong>Required Configuration: Remote Cloud Storage</strong> (Required for GitHub Actions Environment, Cloudflare R2 Recommended)</summary>
			
 
				+   <br>
			
 
				+
			
 
				+   **GitHub Secret Configuration (⚠️ All 4 configuration items below are required):**
			
 
				+
			
 
				+   | Name | Secret (Value) Description |
			
 
				+   |------|----------------------------|
			
 
				+   | `S3_BUCKET_NAME` | Bucket name (e.g., `trendradar-data`) |
			
 
				+   | `S3_ACCESS_KEY_ID` | Access key ID |
			
 
				+   | `S3_SECRET_ACCESS_KEY` | Access key |
			
 
				+   | `S3_ENDPOINT_URL` | S3 API endpoint (e.g., R2: `https://<account-id>.r2.cloudflarestorage.com`) |
			
 
				+
			
 
				+   <br>
			
 
				+
			
 
				+   **How to Get Credentials (Using Cloudflare R2 as Example):**
			
 
				+
			
 
				+   1. Visit [Cloudflare Dashboard](https://dash.cloudflare.com/) and log in
			
 
				+   2. Select `R2` in left menu → Click `Create Bucket` → Enter name (e.g., `trendradar-data`)
			
 
				+   3. Click `Manage R2 API Tokens` at top right → `Create API Token`
			
 
				+   4. Select `Object Read & Write` permission → After creation, it will display `Access Key ID` and `Secret Access Key`
			
 
				+   5. Endpoint URL can be found in bucket details page (format: `https://<account-id>.r2.cloudflarestorage.com`)
			
 
				+
			
 
				+   **Notes**:
			
 
				+   - R2 free tier: 10GB storage + 1 million reads per month, sufficient for this project
			
 
				+   - Activation requires binding a payment method (identity verification only, no charges)
			
 
				+   - Data stored in cloud, keeps GitHub repository clean
			
 
				+
			
 
				+   </details>
			
 
				 
			
 
				    <details>
			
 
				    <summary> <strong>👉 Click to expand: WeWork Bot</strong> (Simplest and fastest configuration)</summary>
			
@@ -2041,7 +2142,7 @@ TrendRadar provides two independent Docker images, deploy according to your need
 
				 
			
 
				    # Download docker compose config
			
 
				    wget https://raw.githubusercontent.com/sansan0/TrendRadar/master/docker/.env -P docker/
			
 
				-   wget https://raw.githubusercontent.com/sansan0/TrendRadar/master/docker/docker compose.yml -P docker/
			
 
				+   wget https://raw.githubusercontent.com/sansan0/TrendRadar/master/docker/docker-compose.yml -P docker/
			
 
				    ```
			
 
				 
			
 
				    > 💡 **Note**: Key directory structure required for Docker deployment:
			
@@ -2052,7 +2153,7 @@ current directory/
 
				 │   └── frequency_words.txt
			
 
				 └── docker/
			
 
				     ├── .env
			
 
				-    └── docker compose.yml
			
 
				+    └── docker-compose.yml
			
 
				 ```
			
 
				 
			
 
				 2. **Config File Description**:
			
@@ -2146,7 +2247,7 @@ vim config/frequency_words.txt
 
				 
			
 
				 # Use build version docker compose
			
 
				 cd docker
			
 
				-cp docker compose-build.yml docker compose.yml
			
 
				+cp docker-compose-build.yml docker-compose.yml
			
 
				 ```
			
 
				 
			
 
				 **Build and Start Services**:
			
@@ -2232,7 +2333,7 @@ docker rm trend-radar
 
				 
			
 
				 > 💡 **Web Server Notes**:
			
 
				 > - After starting, access latest report at `http://localhost:8080`
			
 
				-> - Access historical reports via directory navigation (e.g., `http://localhost:8080/2025年xx月xx日/`)
			
 
				+> - Access historical reports via directory navigation (e.g., `http://localhost:8080/2025-xx-xx/`)
			
 
				 > - Port can be configured in `.env` file with `WEBSERVER_PORT` parameter
			
 
				 > - Auto-start: Set `ENABLE_WEBSERVER=true` in `.env`
			
 
				 > - Security: Static files only, limited to output directory, localhost binding only
			
@@ -2249,7 +2350,7 @@ TrendRadar generates daily summary HTML reports to two locations simultaneously:
 
				 |--------------|---------------|----------|
			
 
				 | `output/index.html` | Direct host access | **Docker Deployment** (via Volume mount, visible on host) |
			
 
				 | `index.html` | Root directory access | **GitHub Pages** (repository root, auto-detected by Pages) |
			
 
				-| `output/YYYY年MM月DD日/html/当日汇总.html` | Historical reports | All environments (archived by date) |
			
 
				+| `output/YYYY-MM-DD/html/当日汇总.html` | Historical reports | All environments (archived by date) |
			
 
				 
			
 
				 **Local Access Examples**:
			
 
				 ```bash
			
@@ -2258,8 +2359,8 @@ TrendRadar generates daily summary HTML reports to two locations simultaneously:
 
				 docker exec -it trend-radar python manage.py start_webserver
			
 
				 # 2. Access in browser
			
 
				 http://localhost:8080                           # Access latest report (default index.html)
			
 
				-http://localhost:8080/2025年xx月xx日/            # Access reports for specific date
			
 
				-http://localhost:8080/2025年xx月xx日/html/       # Browse all HTML files for that date
			
 
				+http://localhost:8080/2025-xx-xx/               # Access reports for specific date
			
 
				+http://localhost:8080/2025-xx-xx/html/          # Browse all HTML files for that date
			
 
				 
			
 
				 # Method 2: Direct file access (local environment)
			
 
				 open ./output/index.html             # macOS
			
@@ -2267,7 +2368,7 @@ start ./output/index.html            # Windows
 
				 xdg-open ./output/index.html         # Linux
			
 
				 
			
 
				 # Method 3: Access historical archives
			
 
				-open ./output/2025年xx月xx日/html/当日汇总.html
			
 
				+open ./output/2025-xx-xx/html/当日汇总.html
			
 
				 ```
			
 
				 
			
 
				 **Why two index.html files?**
			
@@ -2324,10 +2425,20 @@ flowchart TB
 
				 Use docker compose to start both news push and MCP services:
			
 
				 
			
 
				 ```bash
			
 
				-# Download latest docker compose.yml (includes MCP service config)
			
 
				-wget https://raw.githubusercontent.com/sansan0/TrendRadar/master/docker/docker compose.yml
			
 
				+# Method 1: Clone project (Recommended)
			
 
				+git clone https://github.com/sansan0/TrendRadar.git
			
 
				+cd TrendRadar/docker
			
 
				+docker compose up -d
			
 
				 
			
 
				-# Start all services
			
 
				+# Method 2: Download docker-compose.yml separately
			
 
				+mkdir trendradar && cd trendradar
			
 
				+wget https://raw.githubusercontent.com/sansan0/TrendRadar/master/docker/docker-compose.yml
			
 
				+wget https://raw.githubusercontent.com/sansan0/TrendRadar/master/docker/.env
			
 
				+mkdir -p config output
			
 
				+# Download config files
			
 
				+wget https://raw.githubusercontent.com/sansan0/TrendRadar/master/config/config.yaml -P config/
			
 
				+wget https://raw.githubusercontent.com/sansan0/TrendRadar/master/config/frequency_words.txt -P config/
			
 
				+# Modify volume paths in docker-compose.yml: ../config -> ./config, ../output -> ./output
			
 
				 docker compose up -d
			
 
				 
			
 
				 # Check running status
			
@@ -2337,18 +2448,29 @@ docker ps | grep trend-radar
 
				 **Start MCP Service Separately**:
			
 
				 
			
 
				 ```bash
			
 
				+# Linux/Mac
			
 
				 docker run -d --name trend-radar-mcp \
			
 
				   -p 127.0.0.1:3333:3333 \
			
 
				-  -v ./config:/app/config:ro \
			
 
				-  -v ./output:/app/output:ro \
			
 
				+  -v $(pwd)/config:/app/config:ro \
			
 
				+  -v $(pwd)/output:/app/output:ro \
			
 
				   -e TZ=Asia/Shanghai \
			
 
				   wantcat/trendradar-mcp:latest
			
 
				+
			
 
				+# Windows PowerShell
			
 
				+docker run -d --name trend-radar-mcp `
			
 
				+  -p 127.0.0.1:3333:3333 `
			
 
				+  -v ${PWD}/config:/app/config:ro `
			
 
				+  -v ${PWD}/output:/app/output:ro `
			
 
				+  -e TZ=Asia/Shanghai `
			
 
				+  wantcat/trendradar-mcp:latest
			
 
				 ```
			
 
				 
			
 
				+> ⚠️ **Note**: Ensure `config/` and `output/` folders exist in current directory with config files and news data before running.
			
 
				+
			
 
				 **Verify Service**:
			
 
				 
			
 
				 ```bash
			
 
				-# Check if MCP service is running properly
			
 
				+# Check MCP service health
			
 
				 curl http://127.0.0.1:3333/mcp
			
 
				 
			
 
				 # View MCP service logs
			
@@ -2357,14 +2479,20 @@ docker logs -f trend-radar-mcp
 
				 
			
 
				 **Configure in AI Clients**:
			
 
				 
			
 
				-After MCP service starts, configure in Claude Desktop, Cherry Studio, Cursor, etc.:
			
 
				+After MCP service starts, configure based on your client:
			
 
				+
			
 
				+**Cherry Studio** (Recommended, GUI config):
			
 
				+- Settings → MCP Server → Add
			
 
				+- Type: `streamableHttp`
			
 
				+- URL: `http://127.0.0.1:3333/mcp`
			
 
				 
			
 
				+**Claude Desktop / Cline** (JSON config):
			
 
				 ```json
			
 
				 {
			
 
				   "mcpServers": {
			
 
				     "trendradar": {
			
 
				       "url": "http://127.0.0.1:3333/mcp",
			
 
				-      "description": "TrendRadar News Trending Analysis"
			
 
				+      "type": "streamableHttp"
			
 
				     }
			
 
				   }
			
 
				 }
			
@@ -2452,7 +2580,6 @@ notification:
 
				       start: "20:00"                  # Start time (Beijing time)
			
 
				       end: "22:00"                    # End time (Beijing time)
			
 
				     once_per_day: true                # Push only once per day
			
 
				-    push_record_retention_days: 7     # Push record retention days
			
 
				 ```
			
 
				 
			
 
				 #### Configuration Details
			
@@ -2463,7 +2590,6 @@ notification:
 
				 | `time_range.start` | string | `"20:00"` | Push window start time (Beijing time, HH:MM format) |
			
 
				 | `time_range.end` | string | `"22:00"` | Push window end time (Beijing time, HH:MM format) |
			
 
				 | `once_per_day` | bool | `true` | `true`=push only once per day within window, `false`=push every execution within window |
			
 
				-| `push_record_retention_days` | int | `7` | Push record retention days (used to determine if already pushed) |
			
 
				 
			
 
				 #### Use Cases
			
 
				 
			
@@ -2487,7 +2613,6 @@ PUSH_WINDOW_ENABLED=true
 
				 PUSH_WINDOW_START=09:00
			
 
				 PUSH_WINDOW_END=18:00
			
 
				 PUSH_WINDOW_ONCE_PER_DAY=false
			
 
				-PUSH_WINDOW_RETENTION_DAYS=7
			
 
				 ```
			
 
				 
			
 
				 #### Complete Configuration Examples
			
@@ -2502,7 +2627,6 @@ notification:
 
				       start: "20:00"
			
 
				       end: "22:00"
			
 
				     once_per_day: true
			
 
				-    push_record_retention_days: 7
			
 
				 ```
			
 
				 
			
 
				 **Scenario: Push every hour during working hours**
			
@@ -2515,7 +2639,6 @@ notification:
 
				       start: "09:00"
			
 
				       end: "18:00"
			
 
				     once_per_day: false
			
 
				-    push_record_retention_days: 7
			
 
				 ```
			
 
				 
			
 
				 </details>
			
@@ -2811,6 +2934,207 @@ notification:
 
				 
			
 
				 <br>
			
 
				 
			
 
				+### 11. Storage Configuration (v4.0.0 New)
			
 
				+
			
 
				+<details>
			
 
				+<summary>👉 Click to expand: <strong>Storage Configuration Guide</strong></summary>
			
 
				+<br>
			
 
				+
			
 
				+#### Storage Backend Selection
			
 
				+
			
 
				+TrendRadar v4.0.0 introduces **multi-backend storage architecture**, supporting automatic backend selection or manual specification:
			
 
				+
			
 
				+| Configuration Value | Description | Applicable Scenarios |
			
 
				+|---------------------|-------------|---------------------|
			
 
				+| `auto` (default) | Auto-select backend: GitHub Actions→R2, other environments→Local | Most users (recommended) |
			
 
				+| `local` | Force use of local SQLite | Docker/local deployment |
			
 
				+| `r2` | Force use of Cloudflare R2 | Cloud storage required |
			
 
				+
			
 
				+**Configuration Location**:
			
 
				+- GitHub Actions: Set `STORAGE_BACKEND` environment variable in GitHub Secrets
			
 
				+- Docker: Configure `STORAGE_BACKEND=local` in `.env` file
			
 
				+- Local: Add `STORAGE_BACKEND` in environment variables or use auto mode
			
 
				+
			
 
				+---
			
 
				+
			
 
				+#### Database Structure Optimization (v4.0.0)
			
 
				+
			
 
				+v4.0.0 made significant optimizations to database structure, removing redundant fields and improving data normalization:
			
 
				+
			
 
				+##### 1. Removed Redundant Fields
			
 
				+
			
 
				+Removed the following redundant fields from `news` table:
			
 
				+
			
 
				+| Field Name | Removal Reason | Alternative |
			
 
				+|------------|----------------|------------|
			
 
				+| `source_name` | Duplicate with platform name | Get via `platforms` table JOIN query |
			
 
				+| `crawl_date` | Duplicate with file path date | Infer from file path timestamp |
			
 
				+
			
 
				+**Migration Notes**: Old databases are incompatible, see [Breaking Changes](#breaking-changes-v400) section
			
 
				+
			
 
				+##### 2. New Platforms Table
			
 
				+
			
 
				+Added `platforms` table for unified management of platform information:
			
 
				+
			
 
				+```sql
			
 
				+CREATE TABLE IF NOT EXISTS platforms (
			
 
				+    id TEXT PRIMARY KEY,     -- Platform ID (immutable, e.g., 'zhihu', 'weibo')
			
 
				+    name TEXT NOT NULL,      -- Platform display name (mutable, e.g., 'Zhihu', 'Weibo')
			
 
				+    enabled INTEGER DEFAULT 1 -- Whether enabled (1=enabled, 0=disabled)
			
 
				+);
			
 
				+```
			
 
				+
			
 
				+**Design Advantages**:
			
 
				+- `id` field is immutable, maintains data consistency
			
 
				+- `name` field is mutable, supports internationalization and customization
			
 
				+- Historical data remains valid when modifying platform names
			
 
				+
			
 
				+##### 3. Crawl Source Status Normalization
			
 
				+
			
 
				+Replaced original comma-separated string storage `successful_sources` field with normalized `crawl_source_status` table:
			
 
				+
			
 
				+```sql
			
 
				+CREATE TABLE IF NOT EXISTS crawl_source_status (
			
 
				+    id INTEGER PRIMARY KEY AUTOINCREMENT,
			
 
				+    file_path TEXT NOT NULL,           -- File path (e.g., 'output/2025-12-09/news.db')
			
 
				+    platform_id TEXT NOT NULL,         -- Platform ID (foreign key to platforms.id)
			
 
				+    success INTEGER NOT NULL,          -- Whether crawl succeeded (1=success, 0=failed)
			
 
				+    crawl_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
			
 
				+    FOREIGN KEY (platform_id) REFERENCES platforms(id)
			
 
				+);
			
 
				+```
			
 
				+
			
 
				+**Design Advantages**:
			
 
				+- Supports efficient SQL queries (e.g., calculate success rate by platform)
			
 
				+- Easy statistics and analysis (no string splitting required)
			
 
				+- Normalized structure, avoids data redundancy
			
 
				+
			
 
				+##### 4. File Path Format Standardization
			
 
				+
			
 
				+**Old Format**: `output/2025年12月09日/news_14-30.txt`
			
 
				+**New Format**: `output/2025-12-09/news.db`
			
 
				+
			
 
				+**Changes**:
			
 
				+- Date format: Chinese format → ISO 8601 standard format
			
 
				+- Filename: Multiple time-stamped TXT files → single SQLite database file
			
 
				+- Extension: `.txt` → `.db`
			
 
				+
			
 
				+**Advantages**:
			
 
				+- Cross-platform compatibility (avoids Chinese path issues)
			
 
				+- Easier programmatic parsing
			
 
				+- International standard, better maintainability
			
 
				+
			
 
				+---
			
 
				+
			
 
				+#### Remote Cloud Storage Configuration
			
 
				+
			
 
				+When using remote cloud storage (required for GitHub Actions environment), configure the following environment variables:
			
 
				+
			
 
				+| Environment Variable | Description | Required | Example Value |
			
 
				+|----------------------|-------------|----------|--------------|
			
 
				+| `S3_BUCKET_NAME` | Bucket name | ✅ Yes | `trendradar-data` |
			
 
				+| `S3_ACCESS_KEY_ID` | Access key ID | ✅ Yes | `abc123...` |
			
 
				+| `S3_SECRET_ACCESS_KEY` | Access key | ✅ Yes | `xyz789...` |
			
 
				+| `S3_ENDPOINT_URL` | S3 API endpoint | ✅ Yes | `https://<account-id>.r2.cloudflarestorage.com` |
			
 
				+| `S3_REGION` | Region (optional) | ❌ No | `auto` |
			
 
				+
			
 
				+**Configuration Method**:
			
 
				+- GitHub Actions: Configure in GitHub Secrets (see [Quick Start - Remote Storage Configuration](#2-setup-github-secrets-required--optional-platforms))
			
 
				+- Docker/Local: Configure in `.env` file (remote storage is optional)
			
 
				+
			
 
				+---
			
 
				+
			
 
				+#### Data Cleanup Strategy
			
 
				+
			
 
				+v4.0.0 added automatic data cleanup feature, supporting scheduled cleanup of old data:
			
 
				+
			
 
				+**Configuration Items**: `LOCAL_RETENTION_DAYS` and `REMOTE_RETENTION_DAYS`
			
 
				+
			
 
				+| Configuration Value | Description |
			
 
				+|---------------------|-------------|
			
 
				+| `0` (default) | Disable cleanup, keep all data |
			
 
				+| Positive integer (e.g., `30`) | Only keep recent N days of data, auto-delete old data |
			
 
				+
			
 
				+**Configuration Method**:
			
 
				+```bash
			
 
				+# GitHub Actions: Configure in GitHub Secrets
			
 
				+LOCAL_RETENTION_DAYS=30
			
 
				+REMOTE_RETENTION_DAYS=30
			
 
				+
			
 
				+# Docker: Configure in .env file
			
 
				+LOCAL_RETENTION_DAYS=30
			
 
				+REMOTE_RETENTION_DAYS=30
			
 
				+
			
 
				+# Local: Add to environment variables
			
 
				+export LOCAL_RETENTION_DAYS=30
			
 
				+```
			
 
				+
			
 
				+**Cleanup Rules**:
			
 
				+- Cleanup executes during each crawl task
			
 
				+- Local: Deletes `output/YYYY-MM-DD/` directories older than N days
			
 
				+- Remote: Deletes cloud objects older than N days (e.g., `news/2025-11-10.db`)
			
 
				+
			
 
				+---
			
 
				+
			
 
				+#### Timezone Configuration
			
 
				+
			
 
				+v4.0.0 added timezone configuration support, using IANA standard time zone names:
			
 
				+
			
 
				+**Configuration Item**: `TIMEZONE`
			
 
				+
			
 
				+| Configuration Value | Description | Example |
			
 
				+|---------------------|-------------|---------|
			
 
				+| Not set (default) | Use UTC+0 | - |
			
 
				+| IANA time zone name | Specify time zone | `Asia/Shanghai`, `America/New_York`, `Europe/London` |
			
 
				+
			
 
				+**Configuration Method**:
			
 
				+```bash
			
 
				+# GitHub Actions: Configure in GitHub Secrets
			
 
				+TIMEZONE=Asia/Shanghai
			
 
				+
			
 
				+# Docker: Configure in .env file
			
 
				+TIMEZONE=Asia/Shanghai
			
 
				+
			
 
				+# Local: Add to environment variables
			
 
				+export TIMEZONE=Asia/Shanghai
			
 
				+```
			
 
				+
			
 
				+**Common IANA Time Zones**:
			
 
				+- China: `Asia/Shanghai`
			
 
				+- United States East: `America/New_York`
			
 
				+- United States West: `America/Los_Angeles`
			
 
				+- United Kingdom: `Europe/London`
			
 
				+- Japan: `Asia/Tokyo`
			
 
				+
			
 
				+---
			
 
				+
			
 
				+#### Breaking Changes (v4.0.0)
			
 
				+
			
 
				+**⚠️ Important Notice**: v4.0.0 made breaking changes to database structure, **old databases are incompatible**
			
 
				+
			
 
				+**Impact**:
			
 
				+- Cannot directly read v3.x version data
			
 
				+- Need to re-crawl data to build new database
			
 
				+- **No automatic migration tool provided**
			
 
				+
			
 
				+**Recommendations**:
			
 
				+1. **Fresh Start**: Recommended to start from scratch to accumulate data
			
 
				+2. **Keep Historical Data**: If need to preserve v3.x historical data, can rename old `output/` directory (e.g., `output_v3_backup/`) before running new version
			
 
				+
			
 
				+**Data Format Comparison**:
			
 
				+
			
 
				+| Item | v3.x | v4.0.0 |
			
 
				+|------|------|--------|
			
 
				+| File path format | `output/2025年12月09日/` | `output/2025-12-09/` |
			
 
				+| Data file | Multiple `news_HH-MM.txt` files | Single `news.db` file |
			
 
				+| Database fields | Contains `source_name`, `crawl_date` | Removed redundant fields |
			
 
				+| Platform management | No independent table | Added `platforms` table |
			
 
				+| Crawl status | Comma-separated string | Normalized `crawl_source_status` table |
			
 
				+
			
 
				+</details>
			
 
				+
			
 
				+<br>
			
 
				+
			
 
				 ## 🤖 AI Analysis
			
 
				 
			
 
				 TrendRadar v3.0.0 added **MCP (Model Context Protocol)** based AI analysis feature, allowing natural language conversations with news data for deep analysis.
			
--- a/README-MCP-FAQ-EN.md
+++ b/README-MCP-FAQ-EN.md
@@ -450,7 +450,89 @@ AI: (date_range={"start": "2024-12-01", "end": "2024-12-31"})
 
				 
			
 
				 ---
			
 
				 
			
 
				-### Q14: How to parse natural language date expressions? (Recommended to use first)
			
 
				+## Storage Sync
			
 
				+
			
 
				+### Q14: How to sync data from remote storage to local?
			
 
				+
			
 
				+**You can ask like this:**
			
 
				+
			
 
				+- "Sync last 7 days data from remote"
			
 
				+- "Pull data from remote storage to local"
			
 
				+- "Sync last 30 days of news data"
			
 
				+
			
 
				+**Tool called:** `sync_from_remote`
			
 
				+
			
 
				+**Use cases:**
			
 
				+
			
 
				+- Crawler deployed in the cloud (e.g., GitHub Actions), data stored remotely (e.g., Cloudflare R2)
			
 
				+- MCP Server deployed locally, needs to pull data from remote for analysis
			
 
				+
			
 
				+**Return information:**
			
 
				+
			
 
				+- synced_files: Number of successfully synced files
			
 
				+- synced_dates: List of successfully synced dates
			
 
				+- skipped_dates: Skipped dates (already exist locally)
			
 
				+- failed_dates: Failed dates and error information
			
 
				+
			
 
				+**Prerequisites:**
			
 
				+
			
 
				+Need to configure remote storage in `config/config.yaml` or set environment variables:
			
 
				+- `S3_ENDPOINT_URL`: Service endpoint
			
 
				+- `S3_BUCKET_NAME`: Bucket name
			
 
				+- `S3_ACCESS_KEY_ID`: Access key ID
			
 
				+- `S3_SECRET_ACCESS_KEY`: Secret access key
			
 
				+
			
 
				+---
			
 
				+
			
 
				+### Q15: How to view storage status?
			
 
				+
			
 
				+**You can ask like this:**
			
 
				+
			
 
				+- "View current storage status"
			
 
				+- "What's the storage configuration"
			
 
				+- "How much data is stored locally"
			
 
				+- "Is remote storage configured"
			
 
				+
			
 
				+**Tool called:** `get_storage_status`
			
 
				+
			
 
				+**Return information:**
			
 
				+
			
 
				+| Category | Information |
			
 
				+|----------|-------------|
			
 
				+| **Local Storage** | Data directory, total size, date count, date range |
			
 
				+| **Remote Storage** | Whether configured, endpoint URL, bucket name, date count |
			
 
				+| **Pull Config** | Whether auto-pull enabled, pull days |
			
 
				+
			
 
				+---
			
 
				+
			
 
				+### Q16: How to view available data dates?
			
 
				+
			
 
				+**You can ask like this:**
			
 
				+
			
 
				+- "What dates are available locally"
			
 
				+- "What dates are in remote storage"
			
 
				+- "Compare local and remote data dates"
			
 
				+- "Which dates only exist remotely"
			
 
				+
			
 
				+**Tool called:** `list_available_dates`
			
 
				+
			
 
				+**Three query modes:**
			
 
				+
			
 
				+| Mode | Description | Example Question |
			
 
				+|------|-------------|------------------|
			
 
				+| **local** | View local only | "What dates are available locally" |
			
 
				+| **remote** | View remote only | "What dates are in remote" |
			
 
				+| **both** | Compare both (default) | "Compare local and remote data" |
			
 
				+
			
 
				+**Return information (both mode):**
			
 
				+
			
 
				+- only_local: Dates only existing locally
			
 
				+- only_remote: Dates only existing remotely (useful for deciding which dates to sync)
			
 
				+- both: Dates existing in both places
			
 
				+
			
 
				+---
			
 
				+
			
 
				+### Q17: How to parse natural language date expressions? (Recommended to use first)
			
 
				 
			
 
				 **You can ask like this:**
			
 
				 
			
--- a/README-MCP-FAQ.md
+++ b/README-MCP-FAQ.md
@@ -450,7 +450,89 @@ AI：（date_range={"start": "2024-12-01", "end": "2024-12-31"}）
 
				 
			
 
				 ---
			
 
				 
			
 
				-### Q14: 如何解析自然语言日期表达式？（推荐优先使用）
			
 
				+## 存储同步
			
 
				+
			
 
				+### Q14: 如何从远程存储同步数据到本地？
			
 
				+
			
 
				+**你可以这样问：**
			
 
				+
			
 
				+- "从远程同步最近 7 天的数据"
			
 
				+- "拉取远程存储的数据到本地"
			
 
				+- "同步最近 30 天的新闻数据"
			
 
				+
			
 
				+**调用的工具：** `sync_from_remote`
			
 
				+
			
 
				+**使用场景：**
			
 
				+
			
 
				+- 爬虫部署在云端（如 GitHub Actions），数据存储到远程（如 Cloudflare R2）
			
 
				+- MCP Server 部署在本地，需要从远程拉取数据进行分析
			
 
				+
			
 
				+**返回信息：**
			
 
				+
			
 
				+- synced_files: 成功同步的文件数量
			
 
				+- synced_dates: 成功同步的日期列表
			
 
				+- skipped_dates: 跳过的日期（本地已存在）
			
 
				+- failed_dates: 失败的日期及错误信息
			
 
				+
			
 
				+**前提条件：**
			
 
				+
			
 
				+需要在 `config/config.yaml` 中配置远程存储或设置环境变量：
			
 
				+- `S3_ENDPOINT_URL`: 服务端点
			
 
				+- `S3_BUCKET_NAME`: 存储桶名称
			
 
				+- `S3_ACCESS_KEY_ID`: 访问密钥 ID
			
 
				+- `S3_SECRET_ACCESS_KEY`: 访问密钥
			
 
				+
			
 
				+---
			
 
				+
			
 
				+### Q15: 如何查看存储状态？
			
 
				+
			
 
				+**你可以这样问：**
			
 
				+
			
 
				+- "查看当前存储状态"
			
 
				+- "存储配置是什么"
			
 
				+- "本地有多少数据"
			
 
				+- "远程存储配置了吗"
			
 
				+
			
 
				+**调用的工具：** `get_storage_status`
			
 
				+
			
 
				+**返回信息：**
			
 
				+
			
 
				+| 类别 | 信息 |
			
 
				+|------|------|
			
 
				+| **本地存储** | 数据目录、总大小、日期数量、日期范围 |
			
 
				+| **远程存储** | 是否配置、端点地址、存储桶名称、日期数量 |
			
 
				+| **拉取配置** | 是否启用自动拉取、拉取天数 |
			
 
				+
			
 
				+---
			
 
				+
			
 
				+### Q16: 如何查看可用的数据日期？
			
 
				+
			
 
				+**你可以这样问：**
			
 
				+
			
 
				+- "本地有哪些日期的数据"
			
 
				+- "远程存储有哪些日期"
			
 
				+- "对比本地和远程的数据日期"
			
 
				+- "哪些日期只在远程有"
			
 
				+
			
 
				+**调用的工具：** `list_available_dates`
			
 
				+
			
 
				+**三种查询模式：**
			
 
				+
			
 
				+| 模式 | 说明 | 示例问法 |
			
 
				+|------|------|---------|
			
 
				+| **local** | 仅查看本地 | "本地有哪些日期" |
			
 
				+| **remote** | 仅查看远程 | "远程有哪些日期" |
			
 
				+| **both** | 对比两者（默认） | "对比本地和远程的数据" |
			
 
				+
			
 
				+**返回信息（both 模式）：**
			
 
				+
			
 
				+- only_local: 仅本地存在的日期
			
 
				+- only_remote: 仅远程存在的日期（可用于决定同步哪些日期）
			
 
				+- both: 两边都存在的日期
			
 
				+
			
 
				+---
			
 
				+
			
 
				+### Q17: 如何解析自然语言日期表达式？（推荐优先使用）
			
 
				 
			
 
				 **你可以这样问：**
			
 
				 
			
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 
				 <div align="center" id="trendradar">
			
 
				 
			
 
				-> **📢 公告：** 经与 GitHub 官方沟通，完成合规调整后将恢复"一键 Fork 部署"，请关注 **v4.0.0** 版本的更新
			
 
				+> **📢 公告：** **v4.0.0** 版本已发布！包含存储架构重构、数据库优化、模块化改进等重大更新
			
 
				 
			
 
				 <a href="https://github.com/sansan0/TrendRadar" title="TrendRadar">
			
 
				   <img src="/_image/banner.webp" alt="TrendRadar Banner" width="80%">
			
@@ -16,8 +16,8 @@
 
				 [![GitHub Stars](https://img.shields.io/github/stars/sansan0/TrendRadar?style=flat-square&logo=github&color=yellow)](https://github.com/sansan0/TrendRadar/stargazers)
			
 
				 [![GitHub Forks](https://img.shields.io/github/forks/sansan0/TrendRadar?style=flat-square&logo=github&color=blue)](https://github.com/sansan0/TrendRadar/network/members)
			
 
				 [![License](https://img.shields.io/badge/license-GPL--3.0-blue.svg?style=flat-square)](LICENSE)
			
 
				-[![Version](https://img.shields.io/badge/version-v3.5.0-blue.svg)](https://github.com/sansan0/TrendRadar)
			
 
				-[![MCP](https://img.shields.io/badge/MCP-v1.0.3-green.svg)](https://github.com/sansan0/TrendRadar)
			
 
				+[![Version](https://img.shields.io/badge/version-v4.0.0-blue.svg)](https://github.com/sansan0/TrendRadar)
			
 
				+[![MCP](https://img.shields.io/badge/MCP-v1.1.0-green.svg)](https://github.com/sansan0/TrendRadar)
			
 
				 
			
 
				 [![企业微信通知](https://img.shields.io/badge/企业微信-通知-00D4AA?style=flat-square)](https://work.weixin.qq.com/)
			
 
				 [![个人微信通知](https://img.shields.io/badge/个人微信-通知-00D4AA?style=flat-square)](https://weixin.qq.com/)
			
@@ -48,62 +48,61 @@
 
				 <br>
			
 
				 
			
 
				 <details>
			
 
				-<summary>🚨 <strong>【必读】重要公告：本项目的正确部署姿势</strong></summary>
			
 
				+<summary>🚨 <strong>【必读】重要公告：v4.0.0 部署方式与存储架构变更</strong></summary>
			
 
				 
			
 
				 <br>
			
 
				 
			
 
				-> **⚠️ 2025年12月紧急通知**
			
 
				->
			
 
				-> 由于 Fork 数量激增导致 GitHub 服务器压力过大，**GitHub Actions 及 GitHub Pages 部署目前已受限**。为确保顺利部署，请务必阅读以下说明。
			
 
				+### 🛠️ 请选择适合你的部署方式
			
 
				+
			
 
				+#### 🅰️ 方案一：Docker 部署（推荐 🔥）
			
 
				 
			
 
				-### 1. ✅ 唯一推荐部署方式：Docker
			
 
				+* **特点**：最稳定、最简单，数据存储在 **本地 SQLite**，完全自主可控。
			
 
				 
			
 
				-**这是目前最稳定、不受 GitHub 限制的方案。** 数据存储在本地，不会因为 GitHub 策略调整而失效。
			
 
				+* **适用**：有自己的服务器、NAS 或长期运行的电脑。
			
 
				 
			
 
				 * 👉 [跳转到 Docker 部署教程](#6-docker-部署)
			
 
				 
			
 
				 ---
			
 
				 
			
 
				-### 2. 如果你本打算 Fork 本项目...
			
 
				+#### 🅱️ 方案二：GitHub Actions 部署（已恢复 ✅）
			
 
				 
			
 
				-为了减少对 GitHub 服务器的压力，**请千万不要直接点击 "Fork" 按钮！**
			
 
				+* **特点**：数据不再直接写入仓库（Git Commit），而是存储在 **远程云存储**（支持 S3 兼容协议：Cloudflare R2、阿里云 OSS、腾讯云 COS 等）。
			
 
				 
			
 
				-请务必使用 **"Use this template"** 功能来替代 Fork：
			
 
				+* **门槛**：**必须**配置一个 S3 兼容的对象存储服务（推荐免费的 Cloudflare R2）。
			
 
				 
			
 
				-1.  **点击**原仓库页面右上角的绿色的 **[Use this template]** 按钮。
			
 
				-2.  **选择** "Create a new repository"。
			
 
				+> **⚠️ 注意**：选择此方案，请务必执行以下两步配置：
			
 
				 
			
 
				-**为什么要这样做？**
			
 
				-* **❌ Fork**：复制完整历史记录，大量 Fork 同时运行会触发 GitHub 风控。
			
 
				-* **✅ Use this template**：创建的是一个全新的独立仓库，没有历史包袱，对服务器更友好。
			
 
				+#### 1. 🚀 推荐的开始方式：Use this template
			
 
				 
			
 
				----
			
 
				+为了保持仓库整洁，避免继承冗余的历史记录，我**建议**你使用 Template 模式：
			
 
				 
			
 
				-### 3. 关于新版数据存储的说明
			
 
				+1.  **点击**原仓库页面右上角的绿色 **[Use this template]** 按钮。
			
 
				 
			
 
				-新版将使用 **Cloudflare R2** 存储新闻数据，以保证持久化。
			
 
				+2.  **选择** "Create a new repository"。
			
 
				 
			
 
				-**⚠️ 配置前置条件：**
			
 
				+> **💡 为什么要这样做？**
			
 
				+> * **Use this template**：创建一个全新的、干净的仓库，没有历史包袱。
			
 
				+> * **Fork**：会保留完整的提交历史和关联关系，占用 GitHub 更多资源。
			
 
				 
			
 
				-根据 Cloudflare 平台规则，开通 R2 需绑定支付方式。
			
 
				+#### 2. ☁️ 关于 GitHub Actions 必配的远程存储
			
 
				 
			
 
				-- **目的：** 仅作身份验证（Verify Only），不产生扣费。
			
 
				-- **支付：** 支持信用卡或国区 PayPal。
			
 
				-- **用量：** R2 的免费额度足以覆盖本项目日常运行，无需付费。
			
 
				+如果你选择 **方案二 (GitHub Actions)**，则必须配置一个 S3 兼容的对象存储服务。
			
 
				 
			
 
				----
			
 
				+**支持的存储服务：**
			
 
				+- **Cloudflare R2**（推荐，免费额度充足）
			
 
				+- 其他 S3 兼容服务
			
 
				 
			
 
				-### 4. 📅 后续计划与文档阅读说明
			
 
				+**⚠️ 以 Cloudflare R2 为例的配置前置条件：**
			
 
				 
			
 
				-> **后续计划：**
			
 
				-> - 探索新方案：保留 Actions 用于抓取和推送，但不再将数据保存到仓库，改用外部存储。
			
 
				+根据 Cloudflare 平台规则，开通 R2 需绑定支付方式。
			
 
				 
			
 
				-**⚠️ 阅读注意：**
			
 
				-鉴于上述计划意味着 **Fork 部署模式未来可能会以新形式回归**，且当前全面修改文档工作量巨大，我们暂时保留了旧版描述。
			
 
				+* **目的**：仅作身份验证（Verify Only），**不产生扣费**。
			
 
				 
			
 
				-**在当前阶段，若后续教程中仍出现 "Fork" 相关表述，请一律忽略或将其理解为 "Use this template"**。
			
 
				+* **支付**：支持双币信用卡或国区 PayPal。
			
 
				 
			
 
				-👉 **[点击此处查看 TrendRadar 最新官方文档](https://github.com/sansan0/TrendRadar?tab=readme-ov-file)**
			
 
				+* **用量**：R2 的免费额度（10GB存储/月）足以覆盖本项目日常运行，无需担心付费。
			
 
				+
			
 
				+👉 **[点击查看详细配置教程](#-快速开始)**
			
 
				 
			
 
				 </details>
			
 
				 
			
@@ -335,10 +334,30 @@
 
				 - ⚠️ **配对配置**：Telegram 和 ntfy 需要保证配对参数数量一致（如 token 和 chat_id 都是 2 个）
			
 
				 - ⚠️ **数量限制**：默认每个渠道最多 3 个账号，超出会被截断
			
 
				 
			
 
				-### **多端适配**
			
 
				-- **GitHub Pages**：自动生成精美网页报告，PC/移动端适配
			
 
				-- **Docker部署**：支持多架构容器化运行
			
 
				-- **数据持久化**：HTML/TXT多格式历史记录保存
			
 
				+### **灵活存储架构**（v4.0.0 重大更新）
			
 
				+
			
 
				+**多存储后端支持**：
			
 
				+- ☁️ **远程云存储**：GitHub Actions 环境默认，支持 S3 兼容协议（R2/OSS/COS 等），数据存储在云端，不污染仓库
			
 
				+- 💾 **本地 SQLite 数据库**：Docker/本地环境默认，数据完全可控
			
 
				+- 🔄 **自动后端选择**：根据运行环境智能切换存储方式
			
 
				+
			
 
				+**数据格式**：
			
 
				+| 格式 | 用途 | 说明 |
			
 
				+|------|------|------|
			
 
				+| **SQLite** | 主存储 | 单文件数据库，查询快速，支持 MCP AI 分析 |
			
 
				+| **TXT** | 可选快照 | 可读文本格式，方便直接查看 |
			
 
				+| **HTML** | 报告展示 | 精美可视化页面，PC/移动端适配 |
			
 
				+
			
 
				+**数据管理**：
			
 
				+- ✅ 自动清理过期数据（可配置保留天数）
			
 
				+- ✅ 时区配置支持（全球时区）
			
 
				+
			
 
				+> 💡 详细说明见 [配置详解 - 存储配置](#9-存储配置)
			
 
				+
			
 
				+### **多端部署**
			
 
				+- **GitHub Actions**：定时自动爬取 + 远程云存储（需签到续期）
			
 
				+- **Docker 部署**：支持多架构容器化运行，数据本地存储
			
 
				+- **本地运行**：Windows/Mac/Linux 直接运行
			
 
				 
			
 
				 
			
 
				 ### **AI 智能分析（v3.0.0 新增）**
			
@@ -389,10 +408,34 @@ GitHub 一键 Fork 即可使用，无需编程基础。
 
				 >**升级说明**：
			
 
				 - **📌 查看最新更新**：**[原仓库更新日志](https://github.com/sansan0/TrendRadar?tab=readme-ov-file#-更新日志)**
			
 
				 - **提示**：不要通过 **Sync fork** 更新本项目，建议查看【历史更新】，明确具体的【升级方式】和【功能内容】
			
 
				-- **小版本更新**：从 v2.x 升级到 v2.y，用本项目的 `main.py` 代码替换你 fork 仓库中的对应文件
			
 
				 - **大版本升级**：从 v1.x 升级到 v2.y，建议删除现有 fork 后重新 fork，这样更省力且避免配置冲突
			
 
				 
			
 
				 
			
 
				+### 2025/12/13 - v4.0.0
			
 
				+
			
 
				+**🎉 重大更新：全面重构存储和核心架构**
			
 
				+
			
 
				+- **多存储后端支持**：引入全新的存储模块，支持本地 SQLite 和远程云存储（S3 兼容协议，推荐免费的 Cloudflare R2），适应 GitHub Actions、Docker 和本地环境。
			
 
				+- **数据库结构优化**：重构 SQLite 数据库表结构，提升数据效率和查询能力。
			
 
				+- **核心代码模块化**：将主程序逻辑拆分为 trendradar 包的多个模块，显著提升代码可维护性。
			
 
				+- **增强功能**：实现日期格式标准化、数据保留策略、时区配置支持、时间显示优化，并修复远程存储数据持久化问题，确保数据合并的准确性。
			
 
				+- **清理和兼容**：移除了大部分历史兼容代码，统一了数据存储和读取方式。
			
 
				+
			
 
				+
			
 
				+### 2025/12/13 - mcp-v1.1.0
			
 
				+
			
 
				+  **MCP 模块更新:**
			
 
				+  - 适配 v4.0.0，同时也兼容 v3.x 的数据
			
 
				+  - 新增存储同步工具：
			
 
				+    - `sync_from_remote`: 从远程存储拉取数据到本地
			
 
				+    - `get_storage_status`: 获取存储配置和状态
			
 
				+    - `list_available_dates`: 列出本地/远程可用日期范围
			
 
				+
			
 
				+
			
 
				+<details>
			
 
				+<summary>👉 点击展开：<strong>历史更新</strong></summary>
			
 
				+
			
 
				+
			
 
				 ### 2025/12/03 - v3.5.0
			
 
				 
			
 
				 **🎉 核心功能增强**
			
@@ -456,10 +499,6 @@ GitHub 一键 Fork 即可使用，无需编程基础。
 
				   - 工具总数从 13 个增加到 14 个
			
 
				 
			
 
				 
			
 
				-<details>
			
 
				-<summary>👉 点击展开：<strong>历史更新</strong></summary>
			
 
				-
			
 
				-
			
 
				 ### 2025/11/28 - v3.4.1
			
 
				 
			
 
				 **🔧 格式优化**
			
@@ -857,11 +896,44 @@ frequency_words.txt 文件增加了一个【必须词】功能，使用 + 号
 
				 
			
 
				 > **📖 提醒**：Fork 用户建议先 **[查看最新官方文档](https://github.com/sansan0/TrendRadar?tab=readme-ov-file)**，确保配置步骤是最新的。
			
 
				 
			
 
				+### ⚠️ GitHub Actions 使用说明
			
 
				+
			
 
				+**v4.0.0 重要变更**：引入「活跃度检测」机制，GitHub Actions 需定期签到以维持运行。
			
 
				+
			
 
				+#### 🔄 签到续期机制
			
 
				+
			
 
				+- **运行周期**：有效期为 **7 天**，倒计时结束后服务将自动挂起。
			
 
				+- **续期方式**：在 Actions 页面手动触发 "Check In" workflow，即可重置 7 天有效期。
			
 
				+- **操作路径**：`Actions` → `Check In` → `Run workflow`
			
 
				+- **设计理念**：
			
 
				+    - 如果 7 天都忘了签到，或许这些资讯对你来说并非刚需。适时的暂停，能帮你从信息流中抽离，给大脑留出喘息的空间。
			
 
				+    - GitHub Actions 是宝贵的公共计算资源。引入签到机制旨在避免算力的无效空转，确保资源能分配给真正活跃且需要的用户。感谢你的理解与支持。
			
 
				+
			
 
				+#### 📦 数据存储（必需配置）
			
 
				+
			
 
				+GitHub Actions 环境下，数据存储在 **远程云存储**（支持 S3 兼容协议，推荐免费的 Cloudflare R2），不会污染仓库（见下方 **必需配置：远程云存储**）
			
 
				+
			
 
				+#### 🚀 推荐：Docker 部署
			
 
				+
			
 
				+如需长期稳定运行，建议使用 [Docker 部署](#6-docker-部署)，数据存储在本地，无需签到，不过需要额外付费购买云服务器。
			
 
				+
			
 
				+<br>
			
 
				+
			
 
				+> 🎉 **已支持：多云存储方案**
			
 
				+>
			
 
				+> 本项目现已支持 S3 兼容协议，你可以选择：
			
 
				+> - **Cloudflare R2**（推荐，免费额度充足）
			
 
				+> - 其他 S3 兼容存储服务
			
 
				+>
			
 
				+> 只需配置对应的 `S3_ENDPOINT_URL`、`S3_BUCKET_NAME` 等环境变量即可切换。
			
 
				+
			
 
				+---
			
 
				+
			
 
				 1. **Fork 本项目**到你的 GitHub 账户
			
 
				 
			
 
				    - 点击本页面右上角的"Fork"按钮
			
 
				 
			
 
				-2. **设置 GitHub Secrets（选择你需要的平台）**:
			
 
				+2. **设置 GitHub Secrets（必需 + 可选平台）**:
			
 
				 
			
 
				    在你 Fork 后的仓库中，进入 `Settings` > `Secrets and variables` > `Actions` > `New repository secret`
			
 
				 
			
@@ -900,6 +972,53 @@ frequency_words.txt 文件增加了一个【必须词】功能，使用 + 号
 
				 
			
 
				    <br>
			
 
				 
			
 
				+   <details>
			
 
				+   <summary>⚠️ <strong>必需配置：远程云存储</strong>（GitHub Actions 环境必需，推荐 Cloudflare R2）</summary>
			
 
				+   <br>
			
 
				+
			
 
				+    **GitHub Secret 配置（⚠️ 以下 4 个配置项都是必需的）：**
			
 
				+
			
 
				+    | Name（名称） | Secret（值）说明 |
			
 
				+    |-------------|-----------------|
			
 
				+    | `S3_BUCKET_NAME` | 存储桶名称（如 `trendradar-data`） |
			
 
				+    | `S3_ACCESS_KEY_ID` | 访问密钥 ID（Access Key ID） |
			
 
				+    | `S3_SECRET_ACCESS_KEY` | 访问密钥（Secret Access Key） |
			
 
				+    | `S3_ENDPOINT_URL` | S3 API 端点（如 R2：`https://<account-id>.r2.cloudflarestorage.com`） |
			
 
				+
			
 
				+    <br>
			
 
				+
			
 
				+    **如何获取凭据（以 Cloudflare R2 为例）：**
			
 
				+
			
 
				+    1. **进入 R2 概览**：
			
 
				+    - 登录 [Cloudflare Dashboard](https://dash.cloudflare.com/)。
			
 
				+    - 在左侧侧边栏找到并点击 `R2对象存储`。
			
 
				+
			
 
				+    <br>
			
 
				+
			
 
				+    2. **创建存储桶**：
			
 
				+    - 点击`概述`
			
 
				+    - 点击右上角的 `创建存储桶` (Create bucket)。
			
 
				+    - 输入名称（例如 `trendradar-data`），点击 `创建存储桶`。
			
 
				+
			
 
				+    <br>
			
 
				+
			
 
				+    3. **创建 API 令牌**：
			
 
				+    - 回到 **概述**页面。
			
 
				+    - 点击**右下角** `Account Details `找到并点击 `Manage` (Manage R2 API Tokens)。
			
 
				+    - 同时你会看到 `S3 API`：`https://<account-id>.r2.cloudflarestorage.com`(这就是 S3_ENDPOINT_URL)
			
 
				+    - 点击 `创建 Account APl 令牌` 。
			
 
				+    - **⚠️ 关键设置**：
			
 
				+        - **令牌名称**：随意填写（如 `github-action-write`）。
			
 
				+        - **权限**：选择 `管理员读和写` 。
			
 
				+        - **指定存储桶**：为了安全，建议选择 `仅适用于指定存储桶` 并选中你的桶（如 `trendradar-data`）。
			
 
				+    - 点击 `创建 API 令牌`，**立即复制** 显示的 `Access Key ID` 和 `Secret Access Key`（只显示一次！）。
			
 
				+
			
 
				+    <br>
			
 
				+
			
 
				+    - **R2 免费额度**：每月 10GB 存储 + 100万次读取，对本项目来说非常充足。
			
 
				+    - **支付验证**：开通 R2 即使是免费额度，Cloudflare 也要求绑定 PayPal 或信用卡进行身份验证（不会实际扣费，除非超过额度）。
			
 
				+
			
 
				+   </details>
			
 
				 
			
 
				    <details>
			
 
				    <summary>👉 点击展开：<strong>企业微信机器人</strong>（配置最简单最迅速）</summary>
			
@@ -1489,10 +1608,11 @@ frequency_words.txt 文件增加了一个【必须词】功能，使用 + 号
 
				 
			
 
				    **测试步骤**：
			
 
				    1. 进入你项目的 Actions 页面
			
 
				-   2. 找到 **"Hot News Crawler"** 点进去
			
 
				+   2. 找到 **"Get Hot News"**(必须得是这个字)点进去，点击右侧的 **"Run workflow"** 按钮运行 
			
 
				       - 如果看不到该字样，参照 [#109](https://github.com/sansan0/TrendRadar/issues/109) 解决
			
 
				-   3. 点击右侧的 **"Run workflow"** 按钮运行
			
 
				-   4. 等待 1 分钟左右，消息会推送到你配置的平台
			
 
				+   3. 3 分钟左右，消息会推送到你配置的平台
			
 
				+
			
 
				+   <br>
			
 
				 
			
 
				    > ⏱️ **测试提示**：
			
 
				    > - 手动测试不要太频繁，避免触发 GitHub Actions 限制
			
@@ -2069,7 +2189,7 @@ TrendRadar 提供两个独立的 Docker 镜像，可根据需求选择部署：
 
				 
			
 
				    # 下载 docker compose 配置
			
 
				    wget https://raw.githubusercontent.com/sansan0/TrendRadar/master/docker/.env  -P docker/
			
 
				-   wget https://raw.githubusercontent.com/sansan0/TrendRadar/master/docker/docker compose.yml  -P docker/
			
 
				+   wget https://raw.githubusercontent.com/sansan0/TrendRadar/master/docker/docker-compose.yml  -P docker/
			
 
				    ```
			
 
				 
			
 
				    > 💡 **说明**：Docker 部署需要的关键目录结构如下：
			
@@ -2080,7 +2200,7 @@ TrendRadar 提供两个独立的 Docker 镜像，可根据需求选择部署：
 
				 │   └── frequency_words.txt
			
 
				 └── docker/
			
 
				     ├── .env
			
 
				-    └── docker compose.yml
			
 
				+    └── docker-compose.yml
			
 
				 ```
			
 
				 
			
 
				 2. **配置文件说明**:
			
@@ -2174,7 +2294,7 @@ vim config/frequency_words.txt
 
				 
			
 
				 # 使用构建版本的 docker compose
			
 
				 cd docker
			
 
				-cp docker compose-build.yml docker compose.yml
			
 
				+cp docker-compose-build.yml docker-compose.yml
			
 
				 ```
			
 
				 
			
 
				 **构建并启动服务**：
			
@@ -2260,7 +2380,7 @@ docker rm trend-radar
 
				 
			
 
				 > 💡 **Web 服务器说明**：
			
 
				 > - 启动后可通过浏览器访问 `http://localhost:8080` 查看最新报告
			
 
				-> - 通过目录导航访问历史报告（如：`http://localhost:8080/2025年xx月xx日/`）
			
 
				+> - 通过目录导航访问历史报告（如：`http://localhost:8080/2025-xx-xx/`）
			
 
				 > - 端口可在 `.env` 文件中配置 `WEBSERVER_PORT` 参数
			
 
				 > - 自动启动：在 `.env` 中设置 `ENABLE_WEBSERVER=true`
			
 
				 > - 安全提示：仅提供静态文件访问，限制在 output 目录，只绑定本地访问
			
@@ -2277,7 +2397,7 @@ TrendRadar 生成的当日汇总 HTML 报告会同时保存到两个位置：
 
				 |---------|---------|---------|
			
 
				 | `output/index.html` | 宿主机直接访问 | **Docker 部署**（通过 Volume 挂载，宿主机可见） |
			
 
				 | `index.html` | 根目录访问 | **GitHub Pages**（仓库根目录，Pages 自动识别） |
			
 
				-| `output/YYYY年MM月DD日/html/当日汇总.html` | 历史报告访问 | 所有环境（按日期归档） |
			
 
				+| `output/YYYY-MM-DD/html/当日汇总.html` | 历史报告访问 | 所有环境（按日期归档） |
			
 
				 
			
 
				 **本地访问示例**：
			
 
				 ```bash
			
@@ -2286,8 +2406,8 @@ TrendRadar 生成的当日汇总 HTML 报告会同时保存到两个位置：
 
				 docker exec -it trend-radar python manage.py start_webserver
			
 
				 # 2. 在浏览器访问
			
 
				 http://localhost:8080                           # 访问最新报告（默认 index.html）
			
 
				-http://localhost:8080/2025年xx月xx日/            # 访问指定日期的报告
			
 
				-http://localhost:8080/2025年xx月xx日/html/       # 浏览该日期下的所有 HTML 文件
			
 
				+http://localhost:8080/2025-xx-xx/               # 访问指定日期的报告
			
 
				+http://localhost:8080/2025-xx-xx/html/          # 浏览该日期下的所有 HTML 文件
			
 
				 
			
 
				 # 方式 2：直接打开文件（本地环境）
			
 
				 open ./output/index.html             # macOS
			
@@ -2295,7 +2415,7 @@ start ./output/index.html            # Windows
 
				 xdg-open ./output/index.html         # Linux
			
 
				 
			
 
				 # 方式 3：访问历史归档
			
 
				-open ./output/2025年xx月xx日/html/当日汇总.html
			
 
				+open ./output/2025-xx-xx/html/当日汇总.html
			
 
				 ```
			
 
				 
			
 
				 **为什么有两个 index.html？**
			
@@ -2349,34 +2469,42 @@ flowchart TB
 
				 
			
 
				 **快速启动**：
			
 
				 
			
 
				-使用 docker compose 同时启动新闻推送和 MCP 服务：
			
 
				+如果已按照 [方式一：使用 docker compose](#方式一使用-docker-compose推荐) 完成部署，只需启动 MCP 服务：
			
 
				 
			
 
				 ```bash
			
 
				-# 下载最新的 docker compose.yml（已包含 MCP 服务配置）
			
 
				-wget https://raw.githubusercontent.com/sansan0/TrendRadar/master/docker/docker compose.yml
			
 
				-
			
 
				-# 启动所有服务
			
 
				-docker compose up -d
			
 
				+cd TrendRadar/docker
			
 
				+docker compose up -d trend-radar-mcp
			
 
				 
			
 
				 # 查看运行状态
			
 
				-docker ps | grep trend-radar
			
 
				+docker ps | grep trend-radar-mcp
			
 
				 ```
			
 
				 
			
 
				-**单独启动 MCP 服务**：
			
 
				+**单独启动 MCP 服务**（不使用 docker compose）：
			
 
				 
			
 
				 ```bash
			
 
				+# Linux/Mac
			
 
				 docker run -d --name trend-radar-mcp \
			
 
				   -p 127.0.0.1:3333:3333 \
			
 
				-  -v ./config:/app/config:ro \
			
 
				-  -v ./output:/app/output:ro \
			
 
				+  -v $(pwd)/config:/app/config:ro \
			
 
				+  -v $(pwd)/output:/app/output:ro \
			
 
				   -e TZ=Asia/Shanghai \
			
 
				   wantcat/trendradar-mcp:latest
			
 
				+
			
 
				+# Windows PowerShell
			
 
				+docker run -d --name trend-radar-mcp `
			
 
				+  -p 127.0.0.1:3333:3333 `
			
 
				+  -v ${PWD}/config:/app/config:ro `
			
 
				+  -v ${PWD}/output:/app/output:ro `
			
 
				+  -e TZ=Asia/Shanghai `
			
 
				+  wantcat/trendradar-mcp:latest
			
 
				 ```
			
 
				 
			
 
				+> ⚠️ **注意**：单独运行时，确保当前目录下有 `config/` 和 `output/` 文件夹，且包含配置文件和新闻数据。
			
 
				+
			
 
				 **验证服务**：
			
 
				 
			
 
				 ```bash
			
 
				-# 检查 MCP 服务是否正常运行
			
 
				+# 检查 MCP 服务健康状态
			
 
				 curl http://127.0.0.1:3333/mcp
			
 
				 
			
 
				 # 查看 MCP 服务日志
			
@@ -2385,14 +2513,20 @@ docker logs -f trend-radar-mcp
 
				 
			
 
				 **在 AI 客户端中配置**：
			
 
				 
			
 
				-MCP 服务启动后，在 Claude Desktop、Cherry Studio、Cursor 等客户端中配置：
			
 
				+MCP 服务启动后，根据不同客户端进行配置：
			
 
				 
			
 
				+**Cherry Studio**（推荐，GUI 配置）：
			
 
				+- 设置 → MCP 服务器 → 添加
			
 
				+- 类型：`streamableHttp`
			
 
				+- URL：`http://127.0.0.1:3333/mcp`
			
 
				+
			
 
				+**Claude Desktop / Cline**（JSON 配置）：
			
 
				 ```json
			
 
				 {
			
 
				   "mcpServers": {
			
 
				     "trendradar": {
			
 
				       "url": "http://127.0.0.1:3333/mcp",
			
 
				-      "description": "TrendRadar 新闻热点分析"
			
 
				+      "type": "streamableHttp"
			
 
				     }
			
 
				   }
			
 
				 }
			
@@ -2480,7 +2614,6 @@ notification:
 
				       start: "20:00"                  # 开始时间（北京时间）
			
 
				       end: "22:00"                    # 结束时间（北京时间）
			
 
				     once_per_day: true                # 每天只推送一次
			
 
				-    push_record_retention_days: 7     # 推送记录保留天数
			
 
				 ```
			
 
				 
			
 
				 #### 配置项详解
			
@@ -2491,7 +2624,6 @@ notification:
 
				 | `time_range.start` | string | `"20:00"` | 推送时间窗口开始时间（北京时间，HH:MM 格式） |
			
 
				 | `time_range.end` | string | `"22:00"` | 推送时间窗口结束时间（北京时间，HH:MM 格式） |
			
 
				 | `once_per_day` | bool | `true` | `true`=每天在窗口内只推送一次，`false`=窗口内每次执行都推送 |
			
 
				-| `push_record_retention_days` | int | `7` | 推送记录保留天数（用于判断是否已推送） |
			
 
				 
			
 
				 #### 使用场景
			
 
				 
			
@@ -2515,7 +2647,6 @@ PUSH_WINDOW_ENABLED=true
 
				 PUSH_WINDOW_START=09:00
			
 
				 PUSH_WINDOW_END=18:00
			
 
				 PUSH_WINDOW_ONCE_PER_DAY=false
			
 
				-PUSH_WINDOW_RETENTION_DAYS=7
			
 
				 ```
			
 
				 
			
 
				 #### 完整配置示例
			
@@ -2530,7 +2661,6 @@ notification:
 
				       start: "20:00"
			
 
				       end: "22:00"
			
 
				     once_per_day: true
			
 
				-    push_record_retention_days: 7
			
 
				 ```
			
 
				 
			
 
				 **场景：工作时间内每小时推送**
			
@@ -2543,7 +2673,6 @@ notification:
 
				       start: "09:00"
			
 
				       end: "18:00"
			
 
				     once_per_day: false
			
 
				-    push_record_retention_days: 7
			
 
				 ```
			
 
				 
			
 
				 </details>
			
@@ -2829,6 +2958,123 @@ notification:
 
				 
			
 
				 </details>
			
 
				 
			
 
				+### 11. 存储配置
			
 
				+
			
 
				+<details id="storage-config">
			
 
				+<summary>👉 点击展开：<strong>存储架构配置详解</strong></summary>
			
 
				+<br>
			
 
				+
			
 
				+#### 存储后端选择
			
 
				+
			
 
				+**配置位置**：`config/config.yaml` 的 `storage` 部分
			
 
				+
			
 
				+v4.0.0 版本重构了存储架构，支持多种存储后端：
			
 
				+
			
 
				+```yaml
			
 
				+storage:
			
 
				+  backend: auto  # 存储后端：auto（自动选择）/ local（本地SQLite）/ remote（远程云存储）
			
 
				+
			
 
				+  formats:
			
 
				+    sqlite: true   # 是否启用SQLite存储
			
 
				+    txt: true      # 是否生成TXT快照
			
 
				+    html: true     # 是否生成HTML报告
			
 
				+
			
 
				+  local:
			
 
				+    data_dir: "output"    # 本地存储目录
			
 
				+    retention_days: 0     # 本地数据保留天数，0表示永久保留
			
 
				+
			
 
				+  remote:
			
 
				+    endpoint_url: ""      # S3 API 端点
			
 
				+    bucket_name: ""       # 存储桶名称
			
 
				+    access_key_id: ""     # 访问密钥ID
			
 
				+    secret_access_key: "" # 访问密钥
			
 
				+    region: ""            # 区域（可选）
			
 
				+    retention_days: 0     # 远程数据保留天数，0表示永久保留
			
 
				+
			
 
				+  pull:
			
 
				+    enabled: false        # 是否启用启动时从远程拉取数据
			
 
				+    days: 7               # 拉取最近N天的数据
			
 
				+```
			
 
				+
			
 
				+#### 后端选择策略
			
 
				+
			
 
				+| backend 值 | 说明 | 适用场景 |
			
 
				+|-----------|------|---------|
			
 
				+| `auto` | **自动选择**（推荐） | 根据运行环境智能选择：<br>• GitHub Actions → Remote<br>• Docker/本地 → Local |
			
 
				+| `local` | 本地 SQLite 数据库 | Docker 部署、本地开发 |
			
 
				+| `remote` | 远程云存储（S3 兼容，如 Cloudflare R2） | GitHub Actions、多机器同步 |
			
 
				+
			
 
				+
			
 
				+#### 远程云存储配置
			
 
				+
			
 
				+**环境变量**（推荐方式）：
			
 
				+
			
 
				+```bash
			
 
				+# GitHub Actions / Docker 环境变量
			
 
				+STORAGE_BACKEND=remote  # 或 auto
			
 
				+
			
 
				+# 本地/远程数据保留天数（0 表示永久保留）
			
 
				+LOCAL_RETENTION_DAYS=0
			
 
				+REMOTE_RETENTION_DAYS=0
			
 
				+
			
 
				+# S3 兼容存储配置（以 Cloudflare R2 为例）
			
 
				+S3_BUCKET_NAME=your-bucket-name
			
 
				+S3_ACCESS_KEY_ID=your-access-key-id
			
 
				+S3_SECRET_ACCESS_KEY=your-secret-access-key
			
 
				+S3_ENDPOINT_URL=https://<account-id>.r2.cloudflarestorage.com
			
 
				+S3_REGION=auto
			
 
				+
			
 
				+# 数据拉取配置（可选，从远程同步到本地）
			
 
				+PULL_ENABLED=false
			
 
				+PULL_DAYS=7
			
 
				+```
			
 
				+
			
 
				+**获取凭据**：参见 [快速开始 - 远程存储配置](#-快速开始)
			
 
				+
			
 
				+#### 数据清理策略
			
 
				+
			
 
				+**自动清理**：每次运行结束时检查并删除超过保留天数的数据。
			
 
				+
			
 
				+```yaml
			
 
				+storage:
			
 
				+  local:
			
 
				+    retention_days: 30  # 本地保留最近30天数据
			
 
				+  remote:
			
 
				+    retention_days: 30  # 远程保留最近30天数据
			
 
				+```
			
 
				+
			
 
				+**清理逻辑**：
			
 
				+- 本地存储：删除过期日期的文件夹（如 `output/2025-11-10/`）
			
 
				+- 远程存储：批量删除过期的云端对象（如 `news/2025-11-10.db`）
			
 
				+
			
 
				+#### 时区配置（v4.0.0 新增）
			
 
				+
			
 
				+**全球时区支持**：解决非中国用户推送时间窗口问题。
			
 
				+
			
 
				+```yaml
			
 
				+app:
			
 
				+  timezone: "Asia/Shanghai"  # 默认中国时区
			
 
				+  # 其他示例：
			
 
				+  # timezone: "America/Los_Angeles"  # 美西时间
			
 
				+  # timezone: "Europe/London"        # 英国时间
			
 
				+```
			
 
				+
			
 
				+**支持所有 IANA 时区名称**：[时区列表](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones)
			
 
				+
			
 
				+
			
 
				+#### 不兼容变更
			
 
				+
			
 
				+⚠️ **v4.0.0 不兼容 v3.x 数据**：
			
 
				+
			
 
				+1. 数据库结构完全重构，无法读取旧数据
			
 
				+2. 文件路径格式变更（ISO 格式）
			
 
				+
			
 
				+**迁移建议**：
			
 
				+- 从 v4.0.0 开始重新收集数据
			
 
				+- 旧数据如需保留，请手动重命名目录格式（不推荐）
			
 
				+
			
 
				+</details>
			
 
				+
			
 
				 <br>
			
 
				 
			
 
				 ## 🤖 AI 智能分析
			
@@ -2846,7 +3092,7 @@ AI 分析功能**不是**直接查询网络实时数据，而是分析你**本
 
				 
			
 
				 #### 使用说明：
			
 
				 
			
 
				-1. **项目自带测试数据**：`output` 目录默认包含 **2025年11月1日～11月15日** 的新闻数据，可用于快速体验 AI 功能
			
 
				+1. **项目自带测试数据**：`output` 目录默认包含 **2025-11-01～2025-11-15** 的新闻数据，可用于快速体验 AI 功能
			
 
				 
			
 
				 2. **查询限制**：
			
 
				    - ✅ 只能查询已有日期范围内的数据（11月1-15日）
			
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -1,12 +1,60 @@
 
				 app:
			
 
				   version_check_url: "https://raw.githubusercontent.com/sansan0/TrendRadar/refs/heads/master/version"
			
 
				   show_version_update: true # 控制显示版本更新提示，如果 false，则不接受新版本提示
			
 
				+  # 时区配置（影响所有时间显示、推送窗口判断、数据存储）
			
 
				+  # 常用时区：
			
 
				+  #   - Asia/Shanghai (北京时间 UTC+8)
			
 
				+  #   - America/New_York (美东时间 UTC-5/-4)
			
 
				+  #   - Europe/London (伦敦时间 UTC+0/+1)
			
 
				+  # 完整时区列表: https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
			
 
				+  timezone: "Asia/Shanghai"
			
 
				+
			
 
				+# 存储配置
			
 
				+storage:
			
 
				+  # 存储后端选择: local / remote / auto
			
 
				+  # - local: 本地 SQLite + TXT/HTML 文件
			
 
				+  # - remote: 远程云存储（S3 兼容协议，支持 R2/OSS/COS 等）
			
 
				+  # - auto: 自动选择（GitHub Actions 环境且配置了远程存储则用 remote，否则用 local）
			
 
				+  backend: "auto"
			
 
				+
			
 
				+  # 数据格式选项
			
 
				+  formats:
			
 
				+    sqlite: true       # 主存储（必须启用）
			
 
				+    txt: false         # 是否生成 TXT 快照
			
 
				+    html: false        # 是否生成 HTML 报告
			
 
				+
			
 
				+  # 本地存储配置
			
 
				+  local:
			
 
				+    data_dir: "output"        # 数据目录
			
 
				+    retention_days: 0         # 本地数据保留天数（0 = 不清理）
			
 
				+
			
 
				+  # 远程存储配置（S3 兼容协议）
			
 
				+  # 支持: Cloudflare R2, 阿里云 OSS, 腾讯云 COS, AWS S3, MinIO 等
			
 
				+  # 建议将敏感信息配置在 GitHub Secrets 或环境变量中
			
 
				+  remote:
			
 
				+    # 数据保留天数（0 = 不清理远程数据）
			
 
				+    retention_days: 0
			
 
				+    # S3 兼容配置
			
 
				+    endpoint_url: ""          # 服务端点（或环境变量 S3_ENDPOINT_URL）
			
 
				+                              # Cloudflare R2: https://<account_id>.r2.cloudflarestorage.com
			
 
				+                              # 阿里云 OSS: https://oss-cn-hangzhou.aliyuncs.com
			
 
				+                              # 腾讯云 COS: https://cos.ap-guangzhou.myqcloud.com
			
 
				+    bucket_name: ""           # 存储桶名称（或环境变量 S3_BUCKET_NAME）
			
 
				+    access_key_id: ""         # 访问密钥 ID（或环境变量 S3_ACCESS_KEY_ID）
			
 
				+    secret_access_key: ""     # 访问密钥（或环境变量 S3_SECRET_ACCESS_KEY）
			
 
				+    region: ""                # 区域（可选，部分服务商需要，或环境变量 S3_REGION）
			
 
				+
			
 
				+  # 数据拉取配置（从远程同步到本地）
			
 
				+  # 用于 MCP Server 等场景：爬虫存到远程，MCP 拉取到本地分析
			
 
				+  pull:
			
 
				+    enabled: false            # 是否启用启动时自动拉取
			
 
				+    days: 7                   # 拉取最近 N 天的数据（0 = 不拉取）
			
 
				 
			
 
				 crawler:
			
 
				   request_interval: 1000 # 请求间隔(毫秒)
			
 
				   enable_crawler: true # 是否启用爬取新闻功能，如果 false，则直接停止程序
			
 
				   use_proxy: false # 是否启用代理，false 时为关闭
			
 
				-  default_proxy: "http://127.0.0.1:10086"
			
 
				+  default_proxy: "http://127.0.0.1:10801"
			
 
				 
			
 
				 # 🔸 daily（当日汇总模式）
			
 
				 #   • 推送时机：按时推送(默认每小时推送一次)
			
@@ -55,7 +103,6 @@ notification:
 
				       start: "20:00"  # 推送时间窗口开始（北京时间）
			
 
				       end: "22:00"    # 推送时间窗口结束（北京时间）
			
 
				     once_per_day: true  # 每天在时间窗口内只推送一次，如果 false，则窗口内每次执行都推送
			
 
				-    push_record_retention_days: 7  # 推送记录保留天数
			
 
				 
			
 
				   # ⚠️⚠️⚠️ 重要安全警告 / IMPORTANT SECURITY WARNING ⚠️⚠️⚠️
			
 
				   #
			
--- a/docker/.env
+++ b/docker/.env
@@ -40,8 +40,6 @@ PUSH_WINDOW_START=
 
				 PUSH_WINDOW_END=
			
 
				 # 每天只推送一次 (true/false)
			
 
				 PUSH_WINDOW_ONCE_PER_DAY=
			
 
				-# 推送记录保留天数 (数字，如 7)
			
 
				-PUSH_WINDOW_RETENTION_DAYS=
			
 
				 
			
 
				 # ============================================
			
 
				 # 多账号配置
			
@@ -87,6 +85,39 @@ BARK_URL=
 
				 # Slack 推送配置（多账号用 ; 分隔）
			
 
				 SLACK_WEBHOOK_URL=
			
 
				 
			
 
				+# ============================================
			
 
				+# 存储配置
			
 
				+# ============================================
			
 
				+
			
 
				+# 存储后端选择 (local/remote/auto)
			
 
				+# - local: 本地 SQLite + TXT/HTML 文件
			
 
				+# - remote: 远程云存储（S3 兼容协议）
			
 
				+# - auto: 自动选择（GitHub Actions 用 remote，其他用 local）
			
 
				+STORAGE_BACKEND=auto
			
 
				+
			
 
				+# 本地数据保留天数（0 = 无限制，不清理历史数据）
			
 
				+LOCAL_RETENTION_DAYS=0
			
 
				+
			
 
				+# 远程数据保留天数（0 = 无限制，不清理历史数据）
			
 
				+REMOTE_RETENTION_DAYS=0
			
 
				+
			
 
				+# 是否生成 TXT 快照 (true/false)
			
 
				+STORAGE_TXT_ENABLED=
			
 
				+
			
 
				+# 是否生成 HTML 报告 (true/false)
			
 
				+STORAGE_HTML_ENABLED=
			
 
				+
			
 
				+# 远程存储配置（S3 兼容协议，支持 R2/OSS/COS/S3 等）
			
 
				+S3_ENDPOINT_URL=
			
 
				+S3_BUCKET_NAME=
			
 
				+S3_ACCESS_KEY_ID=
			
 
				+S3_SECRET_ACCESS_KEY=
			
 
				+S3_REGION=
			
 
				+
			
 
				+# 数据拉取配置（从远程同步到本地）
			
 
				+PULL_ENABLED=false
			
 
				+PULL_DAYS=7
			
 
				+
			
 
				 # ============================================
			
 
				 # 运行配置
			
 
				 # ============================================
			
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -53,8 +53,8 @@ RUN set -ex && \
 
				 COPY requirements.txt .
			
 
				 RUN pip install --no-cache-dir -r requirements.txt
			
 
				 
			
 
				-COPY main.py .
			
 
				 COPY docker/manage.py .
			
 
				+COPY trendradar/ ./trendradar/
			
 
				 
			
 
				 # 复制 entrypoint.sh 并强制转换为 LF 格式
			
 
				 COPY docker/entrypoint.sh /entrypoint.sh.tmp
			
--- a/docker/Dockerfile.mcp
+++ b/docker/Dockerfile.mcp
@@ -8,6 +8,8 @@ RUN pip install --no-cache-dir -r requirements.txt
 
				 
			
 
				 # 复制 MCP 服务器代码
			
 
				 COPY mcp_server/ ./mcp_server/
			
 
				+# 复制 trendradar 模块（MCP 服务需要读取 SQLite 数据）
			
 
				+COPY trendradar/ ./trendradar/
			
 
				 
			
 
				 # 创建必要目录
			
 
				 RUN mkdir -p /app/config /app/output
			
--- a/docker/docker-compose-build.yml
+++ b/docker/docker-compose-build.yml
@@ -32,7 +32,6 @@ services:
 
				       - PUSH_WINDOW_START=${PUSH_WINDOW_START:-}
			
 
				       - PUSH_WINDOW_END=${PUSH_WINDOW_END:-}
			
 
				       - PUSH_WINDOW_ONCE_PER_DAY=${PUSH_WINDOW_ONCE_PER_DAY:-}
			
 
				-      - PUSH_WINDOW_RETENTION_DAYS=${PUSH_WINDOW_RETENTION_DAYS:-}
			
 
				       # 通知渠道
			
 
				       - FEISHU_WEBHOOK_URL=${FEISHU_WEBHOOK_URL:-}
			
 
				       - TELEGRAM_BOT_TOKEN=${TELEGRAM_BOT_TOKEN:-}
			
@@ -54,6 +53,21 @@ services:
 
				       - BARK_URL=${BARK_URL:-}
			
 
				       # Slack配置
			
 
				       - SLACK_WEBHOOK_URL=${SLACK_WEBHOOK_URL:-}
			
 
				+      # 存储配置
			
 
				+      - STORAGE_BACKEND=${STORAGE_BACKEND:-auto}
			
 
				+      - LOCAL_RETENTION_DAYS=${LOCAL_RETENTION_DAYS:-0}
			
 
				+      - REMOTE_RETENTION_DAYS=${REMOTE_RETENTION_DAYS:-0}
			
 
				+      - STORAGE_TXT_ENABLED=${STORAGE_TXT_ENABLED:-true}
			
 
				+      - STORAGE_HTML_ENABLED=${STORAGE_HTML_ENABLED:-true}
			
 
				+      # 远程存储配置（S3 兼容协议）
			
 
				+      - S3_ENDPOINT_URL=${S3_ENDPOINT_URL:-}
			
 
				+      - S3_BUCKET_NAME=${S3_BUCKET_NAME:-}
			
 
				+      - S3_ACCESS_KEY_ID=${S3_ACCESS_KEY_ID:-}
			
 
				+      - S3_SECRET_ACCESS_KEY=${S3_SECRET_ACCESS_KEY:-}
			
 
				+      - S3_REGION=${S3_REGION:-}
			
 
				+      # 数据拉取配置
			
 
				+      - PULL_ENABLED=${PULL_ENABLED:-false}
			
 
				+      - PULL_DAYS=${PULL_DAYS:-7}
			
 
				       # 运行模式
			
 
				       - CRON_SCHEDULE=${CRON_SCHEDULE:-*/5 * * * *}
			
 
				       - RUN_MODE=${RUN_MODE:-cron}
			
@@ -71,7 +85,7 @@ services:
 
				 
			
 
				     volumes:
			
 
				       - ../config:/app/config:ro
			
 
				-      - ../output:/app/output:ro
			
 
				+      - ../output:/app/output
			
 
				 
			
 
				     environment:
			
 
				       - TZ=Asia/Shanghai
			
--- a/docker/docker-compose.yml
+++ b/docker/docker-compose.yml
@@ -30,7 +30,6 @@ services:
 
				       - PUSH_WINDOW_START=${PUSH_WINDOW_START:-}
			
 
				       - PUSH_WINDOW_END=${PUSH_WINDOW_END:-}
			
 
				       - PUSH_WINDOW_ONCE_PER_DAY=${PUSH_WINDOW_ONCE_PER_DAY:-}
			
 
				-      - PUSH_WINDOW_RETENTION_DAYS=${PUSH_WINDOW_RETENTION_DAYS:-}
			
 
				       # 通知渠道
			
 
				       - FEISHU_WEBHOOK_URL=${FEISHU_WEBHOOK_URL:-}
			
 
				       - TELEGRAM_BOT_TOKEN=${TELEGRAM_BOT_TOKEN:-}
			
@@ -52,6 +51,21 @@ services:
 
				       - BARK_URL=${BARK_URL:-}
			
 
				       # Slack配置
			
 
				       - SLACK_WEBHOOK_URL=${SLACK_WEBHOOK_URL:-}
			
 
				+      # 存储配置
			
 
				+      - STORAGE_BACKEND=${STORAGE_BACKEND:-auto}
			
 
				+      - LOCAL_RETENTION_DAYS=${LOCAL_RETENTION_DAYS:-0}
			
 
				+      - REMOTE_RETENTION_DAYS=${REMOTE_RETENTION_DAYS:-0}
			
 
				+      - STORAGE_TXT_ENABLED=${STORAGE_TXT_ENABLED:-true}
			
 
				+      - STORAGE_HTML_ENABLED=${STORAGE_HTML_ENABLED:-true}
			
 
				+      # 远程存储配置（S3 兼容协议）
			
 
				+      - S3_ENDPOINT_URL=${S3_ENDPOINT_URL:-}
			
 
				+      - S3_BUCKET_NAME=${S3_BUCKET_NAME:-}
			
 
				+      - S3_ACCESS_KEY_ID=${S3_ACCESS_KEY_ID:-}
			
 
				+      - S3_SECRET_ACCESS_KEY=${S3_SECRET_ACCESS_KEY:-}
			
 
				+      - S3_REGION=${S3_REGION:-}
			
 
				+      # 数据拉取配置
			
 
				+      - PULL_ENABLED=${PULL_ENABLED:-false}
			
 
				+      - PULL_DAYS=${PULL_DAYS:-7}
			
 
				       # 运行模式
			
 
				       - CRON_SCHEDULE=${CRON_SCHEDULE:-*/5 * * * *}
			
 
				       - RUN_MODE=${RUN_MODE:-cron}
			
@@ -67,7 +81,7 @@ services:
 
				 
			
 
				     volumes:
			
 
				       - ../config:/app/config:ro
			
 
				-      - ../output:/app/output:ro
			
 
				+      - ../output:/app/output
			
 
				 
			
 
				     environment:
			
 
				       - TZ=Asia/Shanghai
			
--- a/docker/entrypoint.sh
+++ b/docker/entrypoint.sh
@@ -13,11 +13,11 @@ env >> /etc/environment
 
				 case "${RUN_MODE:-cron}" in
			
 
				 "once")
			
 
				     echo "🔄 单次执行"
			
 
				-    exec /usr/local/bin/python main.py
			
 
				+    exec /usr/local/bin/python -m trendradar
			
 
				     ;;
			
 
				 "cron")
			
 
				     # 生成 crontab
			
 
				-    echo "${CRON_SCHEDULE:-*/30 * * * *} cd /app && /usr/local/bin/python main.py" > /tmp/crontab
			
 
				+    echo "${CRON_SCHEDULE:-*/30 * * * *} cd /app && /usr/local/bin/python -m trendradar" > /tmp/crontab
			
 
				     
			
 
				     echo "📅 生成的crontab内容:"
			
 
				     cat /tmp/crontab
			
@@ -30,7 +30,7 @@ case "${RUN_MODE:-cron}" in
 
				     # 立即执行一次（如果配置了）
			
 
				     if [ "${IMMEDIATE_RUN:-false}" = "true" ]; then
			
 
				         echo "▶️ 立即执行一次"
			
 
				-        /usr/local/bin/python main.py
			
 
				+        /usr/local/bin/python -m trendradar
			
 
				     fi
			
 
				 
			
 
				     # 启动 Web 服务器（如果配置了）
			
--- a/docker/manage.py
+++ b/docker/manage.py
@@ -33,7 +33,7 @@ def manual_run():
 
				     print("🔄 手动执行爬虫...")
			
 
				     try:
			
 
				         result = subprocess.run(
			
 
				-            ["python", "main.py"], cwd="/app", capture_output=False, text=True
			
 
				+            ["python", "-m", "trendradar"], cwd="/app", capture_output=False, text=True
			
 
				         )
			
 
				         if result.returncode == 0:
			
 
				             print("✅ 执行完成")
			
@@ -285,12 +285,24 @@ def show_config():
 
				         "TELEGRAM_CHAT_ID",
			
 
				         "CONFIG_PATH",
			
 
				         "FREQUENCY_WORDS_PATH",
			
 
				+        # 存储配置
			
 
				+        "STORAGE_BACKEND",
			
 
				+        "LOCAL_RETENTION_DAYS",
			
 
				+        "REMOTE_RETENTION_DAYS",
			
 
				+        "STORAGE_TXT_ENABLED",
			
 
				+        "STORAGE_HTML_ENABLED",
			
 
				+        "S3_BUCKET_NAME",
			
 
				+        "S3_ACCESS_KEY_ID",
			
 
				+        "S3_ENDPOINT_URL",
			
 
				+        "S3_REGION",
			
 
				+        "PULL_ENABLED",
			
 
				+        "PULL_DAYS",
			
 
				     ]
			
 
				 
			
 
				     for var in env_vars:
			
 
				         value = os.environ.get(var, "未设置")
			
 
				         # 隐藏敏感信息
			
 
				-        if any(sensitive in var for sensitive in ["WEBHOOK", "TOKEN", "KEY"]):
			
 
				+        if any(sensitive in var for sensitive in ["WEBHOOK", "TOKEN", "KEY", "SECRET"]):
			
 
				             if value and value != "未设置":
			
 
				                 masked_value = value[:10] + "***" if len(value) > 10 else "***"
			
 
				                 print(f"  {var}: {masked_value}")
			
@@ -331,6 +343,17 @@ def show_files():
 
				     # 显示最近2天的文件
			
 
				     for date_dir in date_dirs[:2]:
			
 
				         print(f"  📅 {date_dir.name}:")
			
 
				+
			
 
				+        # 检查 SQLite 数据库文件
			
 
				+        db_files = list(date_dir.glob("*.db"))
			
 
				+        if db_files:
			
 
				+            print(f"    💾 SQLite: {len(db_files)} 个数据库")
			
 
				+            for db_file in db_files[:3]:
			
 
				+                mtime = time.ctime(db_file.stat().st_mtime)
			
 
				+                size_kb = db_file.stat().st_size // 1024
			
 
				+                print(f"      📀 {db_file.name} ({size_kb}KB, {mtime.split()[3][:5]})")
			
 
				+
			
 
				+        # 检查子目录（html, txt）
			
 
				         for subdir in ["html", "txt"]:
			
 
				             sub_path = date_dir / subdir
			
 
				             if sub_path.exists():
			
--- a/main.py
+++ b/main.py
@@ -1,5431 +0,0 @@
 
				-# coding=utf-8
			
 
				-
			
 
				-import json
			
 
				-import os
			
 
				-import random
			
 
				-import re
			
 
				-import time
			
 
				-import webbrowser
			
 
				-import smtplib
			
 
				-from email.mime.text import MIMEText
			
 
				-from email.mime.multipart import MIMEMultipart
			
 
				-from email.header import Header
			
 
				-from email.utils import formataddr, formatdate, make_msgid
			
 
				-from datetime import datetime
			
 
				-from pathlib import Path
			
 
				-from typing import Dict, List, Tuple, Optional, Union
			
 
				-
			
 
				-import pytz
			
 
				-import requests
			
 
				-import yaml
			
 
				-
			
 
				-
			
 
				-VERSION = "3.5.0"
			
 
				-
			
 
				-
			
 
				-# === SMTP邮件配置 ===
			
 
				-SMTP_CONFIGS = {
			
 
				-    # Gmail（使用 STARTTLS）
			
 
				-    "gmail.com": {"server": "smtp.gmail.com", "port": 587, "encryption": "TLS"},
			
 
				-    # QQ邮箱（使用 SSL，更稳定）
			
 
				-    "qq.com": {"server": "smtp.qq.com", "port": 465, "encryption": "SSL"},
			
 
				-    # Outlook（使用 STARTTLS）
			
 
				-    "outlook.com": {
			
 
				-        "server": "smtp-mail.outlook.com",
			
 
				-        "port": 587,
			
 
				-        "encryption": "TLS",
			
 
				-    },
			
 
				-    "hotmail.com": {
			
 
				-        "server": "smtp-mail.outlook.com",
			
 
				-        "port": 587,
			
 
				-        "encryption": "TLS",
			
 
				-    },
			
 
				-    "live.com": {"server": "smtp-mail.outlook.com", "port": 587, "encryption": "TLS"},
			
 
				-    # 网易邮箱（使用 SSL，更稳定）
			
 
				-    "163.com": {"server": "smtp.163.com", "port": 465, "encryption": "SSL"},
			
 
				-    "126.com": {"server": "smtp.126.com", "port": 465, "encryption": "SSL"},
			
 
				-    # 新浪邮箱（使用 SSL）
			
 
				-    "sina.com": {"server": "smtp.sina.com", "port": 465, "encryption": "SSL"},
			
 
				-    # 搜狐邮箱（使用 SSL）
			
 
				-    "sohu.com": {"server": "smtp.sohu.com", "port": 465, "encryption": "SSL"},
			
 
				-    # 天翼邮箱（使用 SSL）
			
 
				-    "189.cn": {"server": "smtp.189.cn", "port": 465, "encryption": "SSL"},
			
 
				-    # 阿里云邮箱（使用 TLS）
			
 
				-    "aliyun.com": {"server": "smtp.aliyun.com", "port": 465, "encryption": "TLS"},
			
 
				-}
			
 
				-
			
 
				-
			
 
				-# === 多账号推送工具函数 ===
			
 
				-def parse_multi_account_config(config_value: str, separator: str = ";") -> List[str]:
			
 
				-    """
			
 
				-    解析多账号配置，返回账号列表
			
 
				-
			
 
				-    Args:
			
 
				-        config_value: 配置值字符串，多个账号用分隔符分隔
			
 
				-        separator: 分隔符，默认为 ;
			
 
				-
			
 
				-    Returns:
			
 
				-        账号列表，空字符串会被保留（用于占位）
			
 
				-    """
			
 
				-    if not config_value:
			
 
				-        return []
			
 
				-    # 保留空字符串用于占位（如 ";token2" 表示第一个账号无token）
			
 
				-    accounts = [acc.strip() for acc in config_value.split(separator)]
			
 
				-    # 过滤掉全部为空的情况
			
 
				-    if all(not acc for acc in accounts):
			
 
				-        return []
			
 
				-    return accounts
			
 
				-
			
 
				-
			
 
				-def validate_paired_configs(
			
 
				-    configs: Dict[str, List[str]],
			
 
				-    channel_name: str,
			
 
				-    required_keys: Optional[List[str]] = None
			
 
				-) -> Tuple[bool, int]:
			
 
				-    """
			
 
				-    验证配对配置的数量是否一致
			
 
				-
			
 
				-    Args:
			
 
				-        configs: 配置字典，key 为配置名，value 为账号列表
			
 
				-        channel_name: 渠道名称，用于日志输出
			
 
				-        required_keys: 必须有值的配置项列表
			
 
				-
			
 
				-    Returns:
			
 
				-        (是否验证通过, 账号数量)
			
 
				-    """
			
 
				-    # 过滤掉空列表
			
 
				-    non_empty_configs = {k: v for k, v in configs.items() if v}
			
 
				-
			
 
				-    if not non_empty_configs:
			
 
				-        return True, 0
			
 
				-
			
 
				-    # 检查必须项
			
 
				-    if required_keys:
			
 
				-        for key in required_keys:
			
 
				-            if key not in non_empty_configs or not non_empty_configs[key]:
			
 
				-                return True, 0  # 必须项为空，视为未配置
			
 
				-
			
 
				-    # 获取所有非空配置的长度
			
 
				-    lengths = {k: len(v) for k, v in non_empty_configs.items()}
			
 
				-    unique_lengths = set(lengths.values())
			
 
				-
			
 
				-    if len(unique_lengths) > 1:
			
 
				-        print(f"❌ {channel_name} 配置错误：配对配置数量不一致，将跳过该渠道推送")
			
 
				-        for key, length in lengths.items():
			
 
				-            print(f"   - {key}: {length} 个")
			
 
				-        return False, 0
			
 
				-
			
 
				-    return True, list(unique_lengths)[0] if unique_lengths else 0
			
 
				-
			
 
				-
			
 
				-def limit_accounts(
			
 
				-    accounts: List[str],
			
 
				-    max_count: int,
			
 
				-    channel_name: str
			
 
				-) -> List[str]:
			
 
				-    """
			
 
				-    限制账号数量
			
 
				-
			
 
				-    Args:
			
 
				-        accounts: 账号列表
			
 
				-        max_count: 最大账号数量
			
 
				-        channel_name: 渠道名称，用于日志输出
			
 
				-
			
 
				-    Returns:
			
 
				-        限制后的账号列表
			
 
				-    """
			
 
				-    if len(accounts) > max_count:
			
 
				-        print(f"⚠️ {channel_name} 配置了 {len(accounts)} 个账号，超过最大限制 {max_count}，只使用前 {max_count} 个")
			
 
				-        print(f"   ⚠️ 警告：如果您是 fork 用户，过多账号可能导致 GitHub Actions 运行时间过长，存在账号风险")
			
 
				-        return accounts[:max_count]
			
 
				-    return accounts
			
 
				-
			
 
				-
			
 
				-def get_account_at_index(accounts: List[str], index: int, default: str = "") -> str:
			
 
				-    """
			
 
				-    安全获取指定索引的账号值
			
 
				-
			
 
				-    Args:
			
 
				-        accounts: 账号列表
			
 
				-        index: 索引
			
 
				-        default: 默认值
			
 
				-
			
 
				-    Returns:
			
 
				-        账号值或默认值
			
 
				-    """
			
 
				-    if index < len(accounts):
			
 
				-        return accounts[index] if accounts[index] else default
			
 
				-    return default
			
 
				-
			
 
				-
			
 
				-# === 配置管理 ===
			
 
				-def load_config():
			
 
				-    """加载配置文件"""
			
 
				-    config_path = os.environ.get("CONFIG_PATH", "config/config.yaml")
			
 
				-
			
 
				-    if not Path(config_path).exists():
			
 
				-        raise FileNotFoundError(f"配置文件 {config_path} 不存在")
			
 
				-
			
 
				-    with open(config_path, "r", encoding="utf-8") as f:
			
 
				-        config_data = yaml.safe_load(f)
			
 
				-
			
 
				-    print(f"配置文件加载成功: {config_path}")
			
 
				-
			
 
				-    # 构建配置
			
 
				-    config = {
			
 
				-        "VERSION_CHECK_URL": config_data["app"]["version_check_url"],
			
 
				-        "SHOW_VERSION_UPDATE": config_data["app"]["show_version_update"],
			
 
				-        "REQUEST_INTERVAL": config_data["crawler"]["request_interval"],
			
 
				-        "REPORT_MODE": os.environ.get("REPORT_MODE", "").strip()
			
 
				-        or config_data["report"]["mode"],
			
 
				-        "RANK_THRESHOLD": config_data["report"]["rank_threshold"],
			
 
				-        "SORT_BY_POSITION_FIRST": os.environ.get("SORT_BY_POSITION_FIRST", "").strip().lower()
			
 
				-        in ("true", "1")
			
 
				-        if os.environ.get("SORT_BY_POSITION_FIRST", "").strip()
			
 
				-        else config_data["report"].get("sort_by_position_first", False),
			
 
				-        "MAX_NEWS_PER_KEYWORD": int(
			
 
				-            os.environ.get("MAX_NEWS_PER_KEYWORD", "").strip() or "0"
			
 
				-        )
			
 
				-        or config_data["report"].get("max_news_per_keyword", 0),
			
 
				-        "REVERSE_CONTENT_ORDER": os.environ.get("REVERSE_CONTENT_ORDER", "").strip().lower()
			
 
				-        in ("true", "1")
			
 
				-        if os.environ.get("REVERSE_CONTENT_ORDER", "").strip()
			
 
				-        else config_data["report"].get("reverse_content_order", False),
			
 
				-        "USE_PROXY": config_data["crawler"]["use_proxy"],
			
 
				-        "DEFAULT_PROXY": config_data["crawler"]["default_proxy"],
			
 
				-        "ENABLE_CRAWLER": os.environ.get("ENABLE_CRAWLER", "").strip().lower()
			
 
				-        in ("true", "1")
			
 
				-        if os.environ.get("ENABLE_CRAWLER", "").strip()
			
 
				-        else config_data["crawler"]["enable_crawler"],
			
 
				-        "ENABLE_NOTIFICATION": os.environ.get("ENABLE_NOTIFICATION", "").strip().lower()
			
 
				-        in ("true", "1")
			
 
				-        if os.environ.get("ENABLE_NOTIFICATION", "").strip()
			
 
				-        else config_data["notification"]["enable_notification"],
			
 
				-        "MESSAGE_BATCH_SIZE": config_data["notification"]["message_batch_size"],
			
 
				-        "DINGTALK_BATCH_SIZE": config_data["notification"].get(
			
 
				-            "dingtalk_batch_size", 20000
			
 
				-        ),
			
 
				-        "FEISHU_BATCH_SIZE": config_data["notification"].get("feishu_batch_size", 29000),
			
 
				-        "BARK_BATCH_SIZE": config_data["notification"].get("bark_batch_size", 3600),
			
 
				-        "SLACK_BATCH_SIZE": config_data["notification"].get("slack_batch_size", 4000),
			
 
				-        "BATCH_SEND_INTERVAL": config_data["notification"]["batch_send_interval"],
			
 
				-        "FEISHU_MESSAGE_SEPARATOR": config_data["notification"][
			
 
				-            "feishu_message_separator"
			
 
				-        ],
			
 
				-        # 多账号配置
			
 
				-        "MAX_ACCOUNTS_PER_CHANNEL": int(
			
 
				-            os.environ.get("MAX_ACCOUNTS_PER_CHANNEL", "").strip() or "0"
			
 
				-        )
			
 
				-        or config_data["notification"].get("max_accounts_per_channel", 3),
			
 
				-        "PUSH_WINDOW": {
			
 
				-            "ENABLED": os.environ.get("PUSH_WINDOW_ENABLED", "").strip().lower()
			
 
				-            in ("true", "1")
			
 
				-            if os.environ.get("PUSH_WINDOW_ENABLED", "").strip()
			
 
				-            else config_data["notification"]
			
 
				-            .get("push_window", {})
			
 
				-            .get("enabled", False),
			
 
				-            "TIME_RANGE": {
			
 
				-                "START": os.environ.get("PUSH_WINDOW_START", "").strip()
			
 
				-                or config_data["notification"]
			
 
				-                .get("push_window", {})
			
 
				-                .get("time_range", {})
			
 
				-                .get("start", "08:00"),
			
 
				-                "END": os.environ.get("PUSH_WINDOW_END", "").strip()
			
 
				-                or config_data["notification"]
			
 
				-                .get("push_window", {})
			
 
				-                .get("time_range", {})
			
 
				-                .get("end", "22:00"),
			
 
				-            },
			
 
				-            "ONCE_PER_DAY": os.environ.get("PUSH_WINDOW_ONCE_PER_DAY", "").strip().lower()
			
 
				-            in ("true", "1")
			
 
				-            if os.environ.get("PUSH_WINDOW_ONCE_PER_DAY", "").strip()
			
 
				-            else config_data["notification"]
			
 
				-            .get("push_window", {})
			
 
				-            .get("once_per_day", True),
			
 
				-            "RECORD_RETENTION_DAYS": int(
			
 
				-                os.environ.get("PUSH_WINDOW_RETENTION_DAYS", "").strip() or "0"
			
 
				-            )
			
 
				-            or config_data["notification"]
			
 
				-            .get("push_window", {})
			
 
				-            .get("push_record_retention_days", 7),
			
 
				-        },
			
 
				-        "WEIGHT_CONFIG": {
			
 
				-            "RANK_WEIGHT": config_data["weight"]["rank_weight"],
			
 
				-            "FREQUENCY_WEIGHT": config_data["weight"]["frequency_weight"],
			
 
				-            "HOTNESS_WEIGHT": config_data["weight"]["hotness_weight"],
			
 
				-        },
			
 
				-        "PLATFORMS": config_data["platforms"],
			
 
				-    }
			
 
				-
			
 
				-    # 通知渠道配置（环境变量优先）
			
 
				-    notification = config_data.get("notification", {})
			
 
				-    webhooks = notification.get("webhooks", {})
			
 
				-
			
 
				-    config["FEISHU_WEBHOOK_URL"] = os.environ.get(
			
 
				-        "FEISHU_WEBHOOK_URL", ""
			
 
				-    ).strip() or webhooks.get("feishu_url", "")
			
 
				-    config["DINGTALK_WEBHOOK_URL"] = os.environ.get(
			
 
				-        "DINGTALK_WEBHOOK_URL", ""
			
 
				-    ).strip() or webhooks.get("dingtalk_url", "")
			
 
				-    config["WEWORK_WEBHOOK_URL"] = os.environ.get(
			
 
				-        "WEWORK_WEBHOOK_URL", ""
			
 
				-    ).strip() or webhooks.get("wework_url", "")
			
 
				-    config["WEWORK_MSG_TYPE"] = os.environ.get(
			
 
				-        "WEWORK_MSG_TYPE", ""
			
 
				-    ).strip() or webhooks.get("wework_msg_type", "markdown")
			
 
				-    config["TELEGRAM_BOT_TOKEN"] = os.environ.get(
			
 
				-        "TELEGRAM_BOT_TOKEN", ""
			
 
				-    ).strip() or webhooks.get("telegram_bot_token", "")
			
 
				-    config["TELEGRAM_CHAT_ID"] = os.environ.get(
			
 
				-        "TELEGRAM_CHAT_ID", ""
			
 
				-    ).strip() or webhooks.get("telegram_chat_id", "")
			
 
				-
			
 
				-    # 邮件配置
			
 
				-    config["EMAIL_FROM"] = os.environ.get("EMAIL_FROM", "").strip() or webhooks.get(
			
 
				-        "email_from", ""
			
 
				-    )
			
 
				-    config["EMAIL_PASSWORD"] = os.environ.get(
			
 
				-        "EMAIL_PASSWORD", ""
			
 
				-    ).strip() or webhooks.get("email_password", "")
			
 
				-    config["EMAIL_TO"] = os.environ.get("EMAIL_TO", "").strip() or webhooks.get(
			
 
				-        "email_to", ""
			
 
				-    )
			
 
				-    config["EMAIL_SMTP_SERVER"] = os.environ.get(
			
 
				-        "EMAIL_SMTP_SERVER", ""
			
 
				-    ).strip() or webhooks.get("email_smtp_server", "")
			
 
				-    config["EMAIL_SMTP_PORT"] = os.environ.get(
			
 
				-        "EMAIL_SMTP_PORT", ""
			
 
				-    ).strip() or webhooks.get("email_smtp_port", "")
			
 
				-
			
 
				-    # ntfy配置
			
 
				-    config["NTFY_SERVER_URL"] = (
			
 
				-        os.environ.get("NTFY_SERVER_URL", "").strip()
			
 
				-        or webhooks.get("ntfy_server_url")
			
 
				-        or "https://ntfy.sh"
			
 
				-    )
			
 
				-    config["NTFY_TOPIC"] = os.environ.get("NTFY_TOPIC", "").strip() or webhooks.get(
			
 
				-        "ntfy_topic", ""
			
 
				-    )
			
 
				-    config["NTFY_TOKEN"] = os.environ.get("NTFY_TOKEN", "").strip() or webhooks.get(
			
 
				-        "ntfy_token", ""
			
 
				-    )
			
 
				-
			
 
				-    # Bark配置
			
 
				-    config["BARK_URL"] = os.environ.get("BARK_URL", "").strip() or webhooks.get(
			
 
				-        "bark_url", ""
			
 
				-    )
			
 
				-
			
 
				-    # Slack配置
			
 
				-    config["SLACK_WEBHOOK_URL"] = os.environ.get("SLACK_WEBHOOK_URL", "").strip() or webhooks.get(
			
 
				-        "slack_webhook_url", ""
			
 
				-    )
			
 
				-
			
 
				-    # 输出配置来源信息
			
 
				-    notification_sources = []
			
 
				-    max_accounts = config["MAX_ACCOUNTS_PER_CHANNEL"]
			
 
				-
			
 
				-    if config["FEISHU_WEBHOOK_URL"]:
			
 
				-        accounts = parse_multi_account_config(config["FEISHU_WEBHOOK_URL"])
			
 
				-        count = min(len(accounts), max_accounts)
			
 
				-        source = "环境变量" if os.environ.get("FEISHU_WEBHOOK_URL") else "配置文件"
			
 
				-        notification_sources.append(f"飞书({source}, {count}个账号)")
			
 
				-    if config["DINGTALK_WEBHOOK_URL"]:
			
 
				-        accounts = parse_multi_account_config(config["DINGTALK_WEBHOOK_URL"])
			
 
				-        count = min(len(accounts), max_accounts)
			
 
				-        source = "环境变量" if os.environ.get("DINGTALK_WEBHOOK_URL") else "配置文件"
			
 
				-        notification_sources.append(f"钉钉({source}, {count}个账号)")
			
 
				-    if config["WEWORK_WEBHOOK_URL"]:
			
 
				-        accounts = parse_multi_account_config(config["WEWORK_WEBHOOK_URL"])
			
 
				-        count = min(len(accounts), max_accounts)
			
 
				-        source = "环境变量" if os.environ.get("WEWORK_WEBHOOK_URL") else "配置文件"
			
 
				-        notification_sources.append(f"企业微信({source}, {count}个账号)")
			
 
				-    if config["TELEGRAM_BOT_TOKEN"] and config["TELEGRAM_CHAT_ID"]:
			
 
				-        tokens = parse_multi_account_config(config["TELEGRAM_BOT_TOKEN"])
			
 
				-        chat_ids = parse_multi_account_config(config["TELEGRAM_CHAT_ID"])
			
 
				-        # 验证数量一致性
			
 
				-        valid, count = validate_paired_configs(
			
 
				-            {"bot_token": tokens, "chat_id": chat_ids},
			
 
				-            "Telegram",
			
 
				-            required_keys=["bot_token", "chat_id"]
			
 
				-        )
			
 
				-        if valid and count > 0:
			
 
				-            count = min(count, max_accounts)
			
 
				-            token_source = "环境变量" if os.environ.get("TELEGRAM_BOT_TOKEN") else "配置文件"
			
 
				-            notification_sources.append(f"Telegram({token_source}, {count}个账号)")
			
 
				-    if config["EMAIL_FROM"] and config["EMAIL_PASSWORD"] and config["EMAIL_TO"]:
			
 
				-        from_source = "环境变量" if os.environ.get("EMAIL_FROM") else "配置文件"
			
 
				-        notification_sources.append(f"邮件({from_source})")
			
 
				-
			
 
				-    if config["NTFY_SERVER_URL"] and config["NTFY_TOPIC"]:
			
 
				-        topics = parse_multi_account_config(config["NTFY_TOPIC"])
			
 
				-        tokens = parse_multi_account_config(config["NTFY_TOKEN"])
			
 
				-        # ntfy 的 token 是可选的，但如果配置了，数量必须与 topic 一致
			
 
				-        if tokens:
			
 
				-            valid, count = validate_paired_configs(
			
 
				-                {"topic": topics, "token": tokens},
			
 
				-                "ntfy"
			
 
				-            )
			
 
				-            if valid and count > 0:
			
 
				-                count = min(count, max_accounts)
			
 
				-                server_source = "环境变量" if os.environ.get("NTFY_SERVER_URL") else "配置文件"
			
 
				-                notification_sources.append(f"ntfy({server_source}, {count}个账号)")
			
 
				-        else:
			
 
				-            count = min(len(topics), max_accounts)
			
 
				-            server_source = "环境变量" if os.environ.get("NTFY_SERVER_URL") else "配置文件"
			
 
				-            notification_sources.append(f"ntfy({server_source}, {count}个账号)")
			
 
				-
			
 
				-    if config["BARK_URL"]:
			
 
				-        accounts = parse_multi_account_config(config["BARK_URL"])
			
 
				-        count = min(len(accounts), max_accounts)
			
 
				-        bark_source = "环境变量" if os.environ.get("BARK_URL") else "配置文件"
			
 
				-        notification_sources.append(f"Bark({bark_source}, {count}个账号)")
			
 
				-
			
 
				-    if config["SLACK_WEBHOOK_URL"]:
			
 
				-        accounts = parse_multi_account_config(config["SLACK_WEBHOOK_URL"])
			
 
				-        count = min(len(accounts), max_accounts)
			
 
				-        slack_source = "环境变量" if os.environ.get("SLACK_WEBHOOK_URL") else "配置文件"
			
 
				-        notification_sources.append(f"Slack({slack_source}, {count}个账号)")
			
 
				-
			
 
				-    if notification_sources:
			
 
				-        print(f"通知渠道配置来源: {', '.join(notification_sources)}")
			
 
				-        print(f"每个渠道最大账号数: {max_accounts}")
			
 
				-    else:
			
 
				-        print("未配置任何通知渠道")
			
 
				-
			
 
				-    return config
			
 
				-
			
 
				-
			
 
				-print("正在加载配置...")
			
 
				-CONFIG = load_config()
			
 
				-print(f"TrendRadar v{VERSION} 配置加载完成")
			
 
				-print(f"监控平台数量: {len(CONFIG['PLATFORMS'])}")
			
 
				-
			
 
				-
			
 
				-# === 工具函数 ===
			
 
				-def get_beijing_time():
			
 
				-    """获取北京时间"""
			
 
				-    return datetime.now(pytz.timezone("Asia/Shanghai"))
			
 
				-
			
 
				-
			
 
				-def format_date_folder():
			
 
				-    """格式化日期文件夹"""
			
 
				-    return get_beijing_time().strftime("%Y年%m月%d日")
			
 
				-
			
 
				-
			
 
				-def format_time_filename():
			
 
				-    """格式化时间文件名"""
			
 
				-    return get_beijing_time().strftime("%H时%M分")
			
 
				-
			
 
				-
			
 
				-def clean_title(title: str) -> str:
			
 
				-    """清理标题中的特殊字符"""
			
 
				-    if not isinstance(title, str):
			
 
				-        title = str(title)
			
 
				-    cleaned_title = title.replace("\n", " ").replace("\r", " ")
			
 
				-    cleaned_title = re.sub(r"\s+", " ", cleaned_title)
			
 
				-    cleaned_title = cleaned_title.strip()
			
 
				-    return cleaned_title
			
 
				-
			
 
				-
			
 
				-def ensure_directory_exists(directory: str):
			
 
				-    """确保目录存在"""
			
 
				-    Path(directory).mkdir(parents=True, exist_ok=True)
			
 
				-
			
 
				-
			
 
				-def get_output_path(subfolder: str, filename: str) -> str:
			
 
				-    """获取输出路径"""
			
 
				-    date_folder = format_date_folder()
			
 
				-    output_dir = Path("output") / date_folder / subfolder
			
 
				-    ensure_directory_exists(str(output_dir))
			
 
				-    return str(output_dir / filename)
			
 
				-
			
 
				-
			
 
				-def check_version_update(
			
 
				-    current_version: str, version_url: str, proxy_url: Optional[str] = None
			
 
				-) -> Tuple[bool, Optional[str]]:
			
 
				-    """检查版本更新"""
			
 
				-    try:
			
 
				-        proxies = None
			
 
				-        if proxy_url:
			
 
				-            proxies = {"http": proxy_url, "https": proxy_url}
			
 
				-
			
 
				-        headers = {
			
 
				-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
			
 
				-            "Accept": "text/plain, */*",
			
 
				-            "Cache-Control": "no-cache",
			
 
				-        }
			
 
				-
			
 
				-        response = requests.get(
			
 
				-            version_url, proxies=proxies, headers=headers, timeout=10
			
 
				-        )
			
 
				-        response.raise_for_status()
			
 
				-
			
 
				-        remote_version = response.text.strip()
			
 
				-        print(f"当前版本: {current_version}, 远程版本: {remote_version}")
			
 
				-
			
 
				-        # 比较版本
			
 
				-        def parse_version(version_str):
			
 
				-            try:
			
 
				-                parts = version_str.strip().split(".")
			
 
				-                if len(parts) != 3:
			
 
				-                    raise ValueError("版本号格式不正确")
			
 
				-                return int(parts[0]), int(parts[1]), int(parts[2])
			
 
				-            except:
			
 
				-                return 0, 0, 0
			
 
				-
			
 
				-        current_tuple = parse_version(current_version)
			
 
				-        remote_tuple = parse_version(remote_version)
			
 
				-
			
 
				-        need_update = current_tuple < remote_tuple
			
 
				-        return need_update, remote_version if need_update else None
			
 
				-
			
 
				-    except Exception as e:
			
 
				-        print(f"版本检查失败: {e}")
			
 
				-        return False, None
			
 
				-
			
 
				-
			
 
				-def is_first_crawl_today() -> bool:
			
 
				-    """检测是否是当天第一次爬取"""
			
 
				-    date_folder = format_date_folder()
			
 
				-    txt_dir = Path("output") / date_folder / "txt"
			
 
				-
			
 
				-    if not txt_dir.exists():
			
 
				-        return True
			
 
				-
			
 
				-    files = sorted([f for f in txt_dir.iterdir() if f.suffix == ".txt"])
			
 
				-    return len(files) <= 1
			
 
				-
			
 
				-
			
 
				-def html_escape(text: str) -> str:
			
 
				-    """HTML转义"""
			
 
				-    if not isinstance(text, str):
			
 
				-        text = str(text)
			
 
				-
			
 
				-    return (
			
 
				-        text.replace("&", "&amp;")
			
 
				-        .replace("<", "&lt;")
			
 
				-        .replace(">", "&gt;")
			
 
				-        .replace('"', "&quot;")
			
 
				-        .replace("'", "&#x27;")
			
 
				-    )
			
 
				-
			
 
				-
			
 
				-# === 推送记录管理 ===
			
 
				-class PushRecordManager:
			
 
				-    """推送记录管理器"""
			
 
				-
			
 
				-    def __init__(self):
			
 
				-        self.record_dir = Path("output") / ".push_records"
			
 
				-        self.ensure_record_dir()
			
 
				-        self.cleanup_old_records()
			
 
				-
			
 
				-    def ensure_record_dir(self):
			
 
				-        """确保记录目录存在"""
			
 
				-        self.record_dir.mkdir(parents=True, exist_ok=True)
			
 
				-
			
 
				-    def get_today_record_file(self) -> Path:
			
 
				-        """获取今天的记录文件路径"""
			
 
				-        today = get_beijing_time().strftime("%Y%m%d")
			
 
				-        return self.record_dir / f"push_record_{today}.json"
			
 
				-
			
 
				-    def cleanup_old_records(self):
			
 
				-        """清理过期的推送记录"""
			
 
				-        retention_days = CONFIG["PUSH_WINDOW"]["RECORD_RETENTION_DAYS"]
			
 
				-        current_time = get_beijing_time()
			
 
				-
			
 
				-        for record_file in self.record_dir.glob("push_record_*.json"):
			
 
				-            try:
			
 
				-                date_str = record_file.stem.replace("push_record_", "")
			
 
				-                file_date = datetime.strptime(date_str, "%Y%m%d")
			
 
				-                file_date = pytz.timezone("Asia/Shanghai").localize(file_date)
			
 
				-
			
 
				-                if (current_time - file_date).days > retention_days:
			
 
				-                    record_file.unlink()
			
 
				-                    print(f"清理过期推送记录: {record_file.name}")
			
 
				-            except Exception as e:
			
 
				-                print(f"清理记录文件失败 {record_file}: {e}")
			
 
				-
			
 
				-    def has_pushed_today(self) -> bool:
			
 
				-        """检查今天是否已经推送过"""
			
 
				-        record_file = self.get_today_record_file()
			
 
				-
			
 
				-        if not record_file.exists():
			
 
				-            return False
			
 
				-
			
 
				-        try:
			
 
				-            with open(record_file, "r", encoding="utf-8") as f:
			
 
				-                record = json.load(f)
			
 
				-            return record.get("pushed", False)
			
 
				-        except Exception as e:
			
 
				-            print(f"读取推送记录失败: {e}")
			
 
				-            return False
			
 
				-
			
 
				-    def record_push(self, report_type: str):
			
 
				-        """记录推送"""
			
 
				-        record_file = self.get_today_record_file()
			
 
				-        now = get_beijing_time()
			
 
				-
			
 
				-        record = {
			
 
				-            "pushed": True,
			
 
				-            "push_time": now.strftime("%Y-%m-%d %H:%M:%S"),
			
 
				-            "report_type": report_type,
			
 
				-        }
			
 
				-
			
 
				-        try:
			
 
				-            with open(record_file, "w", encoding="utf-8") as f:
			
 
				-                json.dump(record, f, ensure_ascii=False, indent=2)
			
 
				-            print(f"推送记录已保存: {report_type} at {now.strftime('%H:%M:%S')}")
			
 
				-        except Exception as e:
			
 
				-            print(f"保存推送记录失败: {e}")
			
 
				-
			
 
				-    def is_in_time_range(self, start_time: str, end_time: str) -> bool:
			
 
				-        """检查当前时间是否在指定时间范围内"""
			
 
				-        now = get_beijing_time()
			
 
				-        current_time = now.strftime("%H:%M")
			
 
				-    
			
 
				-        def normalize_time(time_str: str) -> str:
			
 
				-            """将时间字符串标准化为 HH:MM 格式"""
			
 
				-            try:
			
 
				-                parts = time_str.strip().split(":")
			
 
				-                if len(parts) != 2:
			
 
				-                    raise ValueError(f"时间格式错误: {time_str}")
			
 
				-            
			
 
				-                hour = int(parts[0])
			
 
				-                minute = int(parts[1])
			
 
				-            
			
 
				-                if not (0 <= hour <= 23 and 0 <= minute <= 59):
			
 
				-                    raise ValueError(f"时间范围错误: {time_str}")
			
 
				-            
			
 
				-                return f"{hour:02d}:{minute:02d}"
			
 
				-            except Exception as e:
			
 
				-                print(f"时间格式化错误 '{time_str}': {e}")
			
 
				-                return time_str
			
 
				-    
			
 
				-        normalized_start = normalize_time(start_time)
			
 
				-        normalized_end = normalize_time(end_time)
			
 
				-        normalized_current = normalize_time(current_time)
			
 
				-    
			
 
				-        result = normalized_start <= normalized_current <= normalized_end
			
 
				-    
			
 
				-        if not result:
			
 
				-            print(f"时间窗口判断：当前 {normalized_current}，窗口 {normalized_start}-{normalized_end}")
			
 
				-    
			
 
				-        return result
			
 
				-
			
 
				-
			
 
				-# === 数据获取 ===
			
 
				-class DataFetcher:
			
 
				-    """数据获取器"""
			
 
				-
			
 
				-    def __init__(self, proxy_url: Optional[str] = None):
			
 
				-        self.proxy_url = proxy_url
			
 
				-
			
 
				-    def fetch_data(
			
 
				-        self,
			
 
				-        id_info: Union[str, Tuple[str, str]],
			
 
				-        max_retries: int = 2,
			
 
				-        min_retry_wait: int = 3,
			
 
				-        max_retry_wait: int = 5,
			
 
				-    ) -> Tuple[Optional[str], str, str]:
			
 
				-        """获取指定ID数据，支持重试"""
			
 
				-        if isinstance(id_info, tuple):
			
 
				-            id_value, alias = id_info
			
 
				-        else:
			
 
				-            id_value = id_info
			
 
				-            alias = id_value
			
 
				-
			
 
				-        url = f"https://newsnow.busiyi.world/api/s?id={id_value}&latest"
			
 
				-
			
 
				-        proxies = None
			
 
				-        if self.proxy_url:
			
 
				-            proxies = {"http": self.proxy_url, "https": self.proxy_url}
			
 
				-
			
 
				-        headers = {
			
 
				-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
			
 
				-            "Accept": "application/json, text/plain, */*",
			
 
				-            "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
			
 
				-            "Connection": "keep-alive",
			
 
				-            "Cache-Control": "no-cache",
			
 
				-        }
			
 
				-
			
 
				-        retries = 0
			
 
				-        while retries <= max_retries:
			
 
				-            try:
			
 
				-                response = requests.get(
			
 
				-                    url, proxies=proxies, headers=headers, timeout=10
			
 
				-                )
			
 
				-                response.raise_for_status()
			
 
				-
			
 
				-                data_text = response.text
			
 
				-                data_json = json.loads(data_text)
			
 
				-
			
 
				-                status = data_json.get("status", "未知")
			
 
				-                if status not in ["success", "cache"]:
			
 
				-                    raise ValueError(f"响应状态异常: {status}")
			
 
				-
			
 
				-                status_info = "最新数据" if status == "success" else "缓存数据"
			
 
				-                print(f"获取 {id_value} 成功（{status_info}）")
			
 
				-                return data_text, id_value, alias
			
 
				-
			
 
				-            except Exception as e:
			
 
				-                retries += 1
			
 
				-                if retries <= max_retries:
			
 
				-                    base_wait = random.uniform(min_retry_wait, max_retry_wait)
			
 
				-                    additional_wait = (retries - 1) * random.uniform(1, 2)
			
 
				-                    wait_time = base_wait + additional_wait
			
 
				-                    print(f"请求 {id_value} 失败: {e}. {wait_time:.2f}秒后重试...")
			
 
				-                    time.sleep(wait_time)
			
 
				-                else:
			
 
				-                    print(f"请求 {id_value} 失败: {e}")
			
 
				-                    return None, id_value, alias
			
 
				-        return None, id_value, alias
			
 
				-
			
 
				-    def crawl_websites(
			
 
				-        self,
			
 
				-        ids_list: List[Union[str, Tuple[str, str]]],
			
 
				-        request_interval: int = CONFIG["REQUEST_INTERVAL"],
			
 
				-    ) -> Tuple[Dict, Dict, List]:
			
 
				-        """爬取多个网站数据"""
			
 
				-        results = {}
			
 
				-        id_to_name = {}
			
 
				-        failed_ids = []
			
 
				-
			
 
				-        for i, id_info in enumerate(ids_list):
			
 
				-            if isinstance(id_info, tuple):
			
 
				-                id_value, name = id_info
			
 
				-            else:
			
 
				-                id_value = id_info
			
 
				-                name = id_value
			
 
				-
			
 
				-            id_to_name[id_value] = name
			
 
				-            response, _, _ = self.fetch_data(id_info)
			
 
				-
			
 
				-            if response:
			
 
				-                try:
			
 
				-                    data = json.loads(response)
			
 
				-                    results[id_value] = {}
			
 
				-                    for index, item in enumerate(data.get("items", []), 1):
			
 
				-                        title = item.get("title")
			
 
				-                        # 跳过无效标题（None、float、空字符串）
			
 
				-                        if title is None or isinstance(title, float) or not str(title).strip():
			
 
				-                            continue
			
 
				-                        title = str(title).strip()
			
 
				-                        url = item.get("url", "")
			
 
				-                        mobile_url = item.get("mobileUrl", "")
			
 
				-
			
 
				-                        if title in results[id_value]:
			
 
				-                            results[id_value][title]["ranks"].append(index)
			
 
				-                        else:
			
 
				-                            results[id_value][title] = {
			
 
				-                                "ranks": [index],
			
 
				-                                "url": url,
			
 
				-                                "mobileUrl": mobile_url,
			
 
				-                            }
			
 
				-                except json.JSONDecodeError:
			
 
				-                    print(f"解析 {id_value} 响应失败")
			
 
				-                    failed_ids.append(id_value)
			
 
				-                except Exception as e:
			
 
				-                    print(f"处理 {id_value} 数据出错: {e}")
			
 
				-                    failed_ids.append(id_value)
			
 
				-            else:
			
 
				-                failed_ids.append(id_value)
			
 
				-
			
 
				-            if i < len(ids_list) - 1:
			
 
				-                actual_interval = request_interval + random.randint(-10, 20)
			
 
				-                actual_interval = max(50, actual_interval)
			
 
				-                time.sleep(actual_interval / 1000)
			
 
				-
			
 
				-        print(f"成功: {list(results.keys())}, 失败: {failed_ids}")
			
 
				-        return results, id_to_name, failed_ids
			
 
				-
			
 
				-
			
 
				-# === 数据处理 ===
			
 
				-def save_titles_to_file(results: Dict, id_to_name: Dict, failed_ids: List) -> str:
			
 
				-    """保存标题到文件"""
			
 
				-    file_path = get_output_path("txt", f"{format_time_filename()}.txt")
			
 
				-
			
 
				-    with open(file_path, "w", encoding="utf-8") as f:
			
 
				-        for id_value, title_data in results.items():
			
 
				-            # id | name 或 id
			
 
				-            name = id_to_name.get(id_value)
			
 
				-            if name and name != id_value:
			
 
				-                f.write(f"{id_value} | {name}\n")
			
 
				-            else:
			
 
				-                f.write(f"{id_value}\n")
			
 
				-
			
 
				-            # 按排名排序标题
			
 
				-            sorted_titles = []
			
 
				-            for title, info in title_data.items():
			
 
				-                cleaned_title = clean_title(title)
			
 
				-                if isinstance(info, dict):
			
 
				-                    ranks = info.get("ranks", [])
			
 
				-                    url = info.get("url", "")
			
 
				-                    mobile_url = info.get("mobileUrl", "")
			
 
				-                else:
			
 
				-                    ranks = info if isinstance(info, list) else []
			
 
				-                    url = ""
			
 
				-                    mobile_url = ""
			
 
				-
			
 
				-                rank = ranks[0] if ranks else 1
			
 
				-                sorted_titles.append((rank, cleaned_title, url, mobile_url))
			
 
				-
			
 
				-            sorted_titles.sort(key=lambda x: x[0])
			
 
				-
			
 
				-            for rank, cleaned_title, url, mobile_url in sorted_titles:
			
 
				-                line = f"{rank}. {cleaned_title}"
			
 
				-
			
 
				-                if url:
			
 
				-                    line += f" [URL:{url}]"
			
 
				-                if mobile_url:
			
 
				-                    line += f" [MOBILE:{mobile_url}]"
			
 
				-                f.write(line + "\n")
			
 
				-
			
 
				-            f.write("\n")
			
 
				-
			
 
				-        if failed_ids:
			
 
				-            f.write("==== 以下ID请求失败 ====\n")
			
 
				-            for id_value in failed_ids:
			
 
				-                f.write(f"{id_value}\n")
			
 
				-
			
 
				-    return file_path
			
 
				-
			
 
				-
			
 
				-def load_frequency_words(
			
 
				-    frequency_file: Optional[str] = None,
			
 
				-) -> Tuple[List[Dict], List[str], List[str]]:
			
 
				-    """
			
 
				-    加载频率词配置
			
 
				-
			
 
				-    Returns:
			
 
				-        (词组列表, 词组内过滤词, 全局过滤词)
			
 
				-    """
			
 
				-    if frequency_file is None:
			
 
				-        frequency_file = os.environ.get(
			
 
				-            "FREQUENCY_WORDS_PATH", "config/frequency_words.txt"
			
 
				-        )
			
 
				-
			
 
				-    frequency_path = Path(frequency_file)
			
 
				-    if not frequency_path.exists():
			
 
				-        raise FileNotFoundError(f"频率词文件 {frequency_file} 不存在")
			
 
				-
			
 
				-    with open(frequency_path, "r", encoding="utf-8") as f:
			
 
				-        content = f.read()
			
 
				-
			
 
				-    word_groups = [group.strip() for group in content.split("\n\n") if group.strip()]
			
 
				-
			
 
				-    processed_groups = []
			
 
				-    filter_words = []
			
 
				-    global_filters = []  # 新增：全局过滤词列表
			
 
				-
			
 
				-    # 默认区域（向后兼容）
			
 
				-    current_section = "WORD_GROUPS"
			
 
				-
			
 
				-    for group in word_groups:
			
 
				-        lines = [line.strip() for line in group.split("\n") if line.strip()]
			
 
				-
			
 
				-        if not lines:
			
 
				-            continue
			
 
				-
			
 
				-        # 检查是否为区域标记
			
 
				-        if lines[0].startswith("[") and lines[0].endswith("]"):
			
 
				-            section_name = lines[0][1:-1].upper()
			
 
				-            if section_name in ("GLOBAL_FILTER", "WORD_GROUPS"):
			
 
				-                current_section = section_name
			
 
				-                lines = lines[1:]  # 移除标记行
			
 
				-
			
 
				-        # 处理全局过滤区域
			
 
				-        if current_section == "GLOBAL_FILTER":
			
 
				-            # 直接添加所有非空行到全局过滤列表
			
 
				-            for line in lines:
			
 
				-                # 忽略特殊语法前缀，只提取纯文本
			
 
				-                if line.startswith(("!", "+", "@")):
			
 
				-                    continue  # 全局过滤区不支持特殊语法
			
 
				-                if line:
			
 
				-                    global_filters.append(line)
			
 
				-            continue
			
 
				-
			
 
				-        # 处理词组区域（保持现有逻辑）
			
 
				-        words = lines
			
 
				-
			
 
				-        group_required_words = []
			
 
				-        group_normal_words = []
			
 
				-        group_filter_words = []
			
 
				-        group_max_count = 0  # 默认不限制
			
 
				-
			
 
				-        for word in words:
			
 
				-            if word.startswith("@"):
			
 
				-                # 解析最大显示数量（只接受正整数）
			
 
				-                try:
			
 
				-                    count = int(word[1:])
			
 
				-                    if count > 0:
			
 
				-                        group_max_count = count
			
 
				-                except (ValueError, IndexError):
			
 
				-                    pass  # 忽略无效的@数字格式
			
 
				-            elif word.startswith("!"):
			
 
				-                filter_words.append(word[1:])
			
 
				-                group_filter_words.append(word[1:])
			
 
				-            elif word.startswith("+"):
			
 
				-                group_required_words.append(word[1:])
			
 
				-            else:
			
 
				-                group_normal_words.append(word)
			
 
				-
			
 
				-        if group_required_words or group_normal_words:
			
 
				-            if group_normal_words:
			
 
				-                group_key = " ".join(group_normal_words)
			
 
				-            else:
			
 
				-                group_key = " ".join(group_required_words)
			
 
				-
			
 
				-            processed_groups.append(
			
 
				-                {
			
 
				-                    "required": group_required_words,
			
 
				-                    "normal": group_normal_words,
			
 
				-                    "group_key": group_key,
			
 
				-                    "max_count": group_max_count,  # 新增字段
			
 
				-                }
			
 
				-            )
			
 
				-
			
 
				-    return processed_groups, filter_words, global_filters
			
 
				-
			
 
				-
			
 
				-def parse_file_titles(file_path: Path) -> Tuple[Dict, Dict]:
			
 
				-    """解析单个txt文件的标题数据，返回(titles_by_id, id_to_name)"""
			
 
				-    titles_by_id = {}
			
 
				-    id_to_name = {}
			
 
				-
			
 
				-    with open(file_path, "r", encoding="utf-8") as f:
			
 
				-        content = f.read()
			
 
				-        sections = content.split("\n\n")
			
 
				-
			
 
				-        for section in sections:
			
 
				-            if not section.strip() or "==== 以下ID请求失败 ====" in section:
			
 
				-                continue
			
 
				-
			
 
				-            lines = section.strip().split("\n")
			
 
				-            if len(lines) < 2:
			
 
				-                continue
			
 
				-
			
 
				-            # id | name 或 id
			
 
				-            header_line = lines[0].strip()
			
 
				-            if " | " in header_line:
			
 
				-                parts = header_line.split(" | ", 1)
			
 
				-                source_id = parts[0].strip()
			
 
				-                name = parts[1].strip()
			
 
				-                id_to_name[source_id] = name
			
 
				-            else:
			
 
				-                source_id = header_line
			
 
				-                id_to_name[source_id] = source_id
			
 
				-
			
 
				-            titles_by_id[source_id] = {}
			
 
				-
			
 
				-            for line in lines[1:]:
			
 
				-                if line.strip():
			
 
				-                    try:
			
 
				-                        title_part = line.strip()
			
 
				-                        rank = None
			
 
				-
			
 
				-                        # 提取排名
			
 
				-                        if ". " in title_part and title_part.split(". ")[0].isdigit():
			
 
				-                            rank_str, title_part = title_part.split(". ", 1)
			
 
				-                            rank = int(rank_str)
			
 
				-
			
 
				-                        # 提取 MOBILE URL
			
 
				-                        mobile_url = ""
			
 
				-                        if " [MOBILE:" in title_part:
			
 
				-                            title_part, mobile_part = title_part.rsplit(" [MOBILE:", 1)
			
 
				-                            if mobile_part.endswith("]"):
			
 
				-                                mobile_url = mobile_part[:-1]
			
 
				-
			
 
				-                        # 提取 URL
			
 
				-                        url = ""
			
 
				-                        if " [URL:" in title_part:
			
 
				-                            title_part, url_part = title_part.rsplit(" [URL:", 1)
			
 
				-                            if url_part.endswith("]"):
			
 
				-                                url = url_part[:-1]
			
 
				-
			
 
				-                        title = clean_title(title_part.strip())
			
 
				-                        ranks = [rank] if rank is not None else [1]
			
 
				-
			
 
				-                        titles_by_id[source_id][title] = {
			
 
				-                            "ranks": ranks,
			
 
				-                            "url": url,
			
 
				-                            "mobileUrl": mobile_url,
			
 
				-                        }
			
 
				-
			
 
				-                    except Exception as e:
			
 
				-                        print(f"解析标题行出错: {line}, 错误: {e}")
			
 
				-
			
 
				-    return titles_by_id, id_to_name
			
 
				-
			
 
				-
			
 
				-def read_all_today_titles(
			
 
				-    current_platform_ids: Optional[List[str]] = None,
			
 
				-) -> Tuple[Dict, Dict, Dict]:
			
 
				-    """读取当天所有标题文件，支持按当前监控平台过滤"""
			
 
				-    date_folder = format_date_folder()
			
 
				-    txt_dir = Path("output") / date_folder / "txt"
			
 
				-
			
 
				-    if not txt_dir.exists():
			
 
				-        return {}, {}, {}
			
 
				-
			
 
				-    all_results = {}
			
 
				-    final_id_to_name = {}
			
 
				-    title_info = {}
			
 
				-
			
 
				-    files = sorted([f for f in txt_dir.iterdir() if f.suffix == ".txt"])
			
 
				-
			
 
				-    for file_path in files:
			
 
				-        time_info = file_path.stem
			
 
				-
			
 
				-        titles_by_id, file_id_to_name = parse_file_titles(file_path)
			
 
				-
			
 
				-        if current_platform_ids is not None:
			
 
				-            filtered_titles_by_id = {}
			
 
				-            filtered_id_to_name = {}
			
 
				-
			
 
				-            for source_id, title_data in titles_by_id.items():
			
 
				-                if source_id in current_platform_ids:
			
 
				-                    filtered_titles_by_id[source_id] = title_data
			
 
				-                    if source_id in file_id_to_name:
			
 
				-                        filtered_id_to_name[source_id] = file_id_to_name[source_id]
			
 
				-
			
 
				-            titles_by_id = filtered_titles_by_id
			
 
				-            file_id_to_name = filtered_id_to_name
			
 
				-
			
 
				-        final_id_to_name.update(file_id_to_name)
			
 
				-
			
 
				-        for source_id, title_data in titles_by_id.items():
			
 
				-            process_source_data(
			
 
				-                source_id, title_data, time_info, all_results, title_info
			
 
				-            )
			
 
				-
			
 
				-    return all_results, final_id_to_name, title_info
			
 
				-
			
 
				-
			
 
				-def process_source_data(
			
 
				-    source_id: str,
			
 
				-    title_data: Dict,
			
 
				-    time_info: str,
			
 
				-    all_results: Dict,
			
 
				-    title_info: Dict,
			
 
				-) -> None:
			
 
				-    """处理来源数据，合并重复标题"""
			
 
				-    if source_id not in all_results:
			
 
				-        all_results[source_id] = title_data
			
 
				-
			
 
				-        if source_id not in title_info:
			
 
				-            title_info[source_id] = {}
			
 
				-
			
 
				-        for title, data in title_data.items():
			
 
				-            ranks = data.get("ranks", [])
			
 
				-            url = data.get("url", "")
			
 
				-            mobile_url = data.get("mobileUrl", "")
			
 
				-
			
 
				-            title_info[source_id][title] = {
			
 
				-                "first_time": time_info,
			
 
				-                "last_time": time_info,
			
 
				-                "count": 1,
			
 
				-                "ranks": ranks,
			
 
				-                "url": url,
			
 
				-                "mobileUrl": mobile_url,
			
 
				-            }
			
 
				-    else:
			
 
				-        for title, data in title_data.items():
			
 
				-            ranks = data.get("ranks", [])
			
 
				-            url = data.get("url", "")
			
 
				-            mobile_url = data.get("mobileUrl", "")
			
 
				-
			
 
				-            if title not in all_results[source_id]:
			
 
				-                all_results[source_id][title] = {
			
 
				-                    "ranks": ranks,
			
 
				-                    "url": url,
			
 
				-                    "mobileUrl": mobile_url,
			
 
				-                }
			
 
				-                title_info[source_id][title] = {
			
 
				-                    "first_time": time_info,
			
 
				-                    "last_time": time_info,
			
 
				-                    "count": 1,
			
 
				-                    "ranks": ranks,
			
 
				-                    "url": url,
			
 
				-                    "mobileUrl": mobile_url,
			
 
				-                }
			
 
				-            else:
			
 
				-                existing_data = all_results[source_id][title]
			
 
				-                existing_ranks = existing_data.get("ranks", [])
			
 
				-                existing_url = existing_data.get("url", "")
			
 
				-                existing_mobile_url = existing_data.get("mobileUrl", "")
			
 
				-
			
 
				-                merged_ranks = existing_ranks.copy()
			
 
				-                for rank in ranks:
			
 
				-                    if rank not in merged_ranks:
			
 
				-                        merged_ranks.append(rank)
			
 
				-
			
 
				-                all_results[source_id][title] = {
			
 
				-                    "ranks": merged_ranks,
			
 
				-                    "url": existing_url or url,
			
 
				-                    "mobileUrl": existing_mobile_url or mobile_url,
			
 
				-                }
			
 
				-
			
 
				-                title_info[source_id][title]["last_time"] = time_info
			
 
				-                title_info[source_id][title]["ranks"] = merged_ranks
			
 
				-                title_info[source_id][title]["count"] += 1
			
 
				-                if not title_info[source_id][title].get("url"):
			
 
				-                    title_info[source_id][title]["url"] = url
			
 
				-                if not title_info[source_id][title].get("mobileUrl"):
			
 
				-                    title_info[source_id][title]["mobileUrl"] = mobile_url
			
 
				-
			
 
				-
			
 
				-def detect_latest_new_titles(current_platform_ids: Optional[List[str]] = None) -> Dict:
			
 
				-    """检测当日最新批次的新增标题，支持按当前监控平台过滤"""
			
 
				-    date_folder = format_date_folder()
			
 
				-    txt_dir = Path("output") / date_folder / "txt"
			
 
				-
			
 
				-    if not txt_dir.exists():
			
 
				-        return {}
			
 
				-
			
 
				-    files = sorted([f for f in txt_dir.iterdir() if f.suffix == ".txt"])
			
 
				-    if len(files) < 2:
			
 
				-        return {}
			
 
				-
			
 
				-    # 解析最新文件
			
 
				-    latest_file = files[-1]
			
 
				-    latest_titles, _ = parse_file_titles(latest_file)
			
 
				-
			
 
				-    # 如果指定了当前平台列表，过滤最新文件数据
			
 
				-    if current_platform_ids is not None:
			
 
				-        filtered_latest_titles = {}
			
 
				-        for source_id, title_data in latest_titles.items():
			
 
				-            if source_id in current_platform_ids:
			
 
				-                filtered_latest_titles[source_id] = title_data
			
 
				-        latest_titles = filtered_latest_titles
			
 
				-
			
 
				-    # 汇总历史标题（按平台过滤）
			
 
				-    historical_titles = {}
			
 
				-    for file_path in files[:-1]:
			
 
				-        historical_data, _ = parse_file_titles(file_path)
			
 
				-
			
 
				-        # 过滤历史数据
			
 
				-        if current_platform_ids is not None:
			
 
				-            filtered_historical_data = {}
			
 
				-            for source_id, title_data in historical_data.items():
			
 
				-                if source_id in current_platform_ids:
			
 
				-                    filtered_historical_data[source_id] = title_data
			
 
				-            historical_data = filtered_historical_data
			
 
				-
			
 
				-        for source_id, titles_data in historical_data.items():
			
 
				-            if source_id not in historical_titles:
			
 
				-                historical_titles[source_id] = set()
			
 
				-            for title in titles_data.keys():
			
 
				-                historical_titles[source_id].add(title)
			
 
				-
			
 
				-    # 找出新增标题
			
 
				-    new_titles = {}
			
 
				-    for source_id, latest_source_titles in latest_titles.items():
			
 
				-        historical_set = historical_titles.get(source_id, set())
			
 
				-        source_new_titles = {}
			
 
				-
			
 
				-        for title, title_data in latest_source_titles.items():
			
 
				-            if title not in historical_set:
			
 
				-                source_new_titles[title] = title_data
			
 
				-
			
 
				-        if source_new_titles:
			
 
				-            new_titles[source_id] = source_new_titles
			
 
				-
			
 
				-    return new_titles
			
 
				-
			
 
				-
			
 
				-# === 统计和分析 ===
			
 
				-def calculate_news_weight(
			
 
				-    title_data: Dict, rank_threshold: int = CONFIG["RANK_THRESHOLD"]
			
 
				-) -> float:
			
 
				-    """计算新闻权重，用于排序"""
			
 
				-    ranks = title_data.get("ranks", [])
			
 
				-    if not ranks:
			
 
				-        return 0.0
			
 
				-
			
 
				-    count = title_data.get("count", len(ranks))
			
 
				-    weight_config = CONFIG["WEIGHT_CONFIG"]
			
 
				-
			
 
				-    # 排名权重：Σ(11 - min(rank, 10)) / 出现次数
			
 
				-    rank_scores = []
			
 
				-    for rank in ranks:
			
 
				-        score = 11 - min(rank, 10)
			
 
				-        rank_scores.append(score)
			
 
				-
			
 
				-    rank_weight = sum(rank_scores) / len(ranks) if ranks else 0
			
 
				-
			
 
				-    # 频次权重：min(出现次数, 10) × 10
			
 
				-    frequency_weight = min(count, 10) * 10
			
 
				-
			
 
				-    # 热度加成：高排名次数 / 总出现次数 × 100
			
 
				-    high_rank_count = sum(1 for rank in ranks if rank <= rank_threshold)
			
 
				-    hotness_ratio = high_rank_count / len(ranks) if ranks else 0
			
 
				-    hotness_weight = hotness_ratio * 100
			
 
				-
			
 
				-    total_weight = (
			
 
				-        rank_weight * weight_config["RANK_WEIGHT"]
			
 
				-        + frequency_weight * weight_config["FREQUENCY_WEIGHT"]
			
 
				-        + hotness_weight * weight_config["HOTNESS_WEIGHT"]
			
 
				-    )
			
 
				-
			
 
				-    return total_weight
			
 
				-
			
 
				-
			
 
				-def matches_word_groups(
			
 
				-    title: str, word_groups: List[Dict], filter_words: List[str], global_filters: Optional[List[str]] = None
			
 
				-) -> bool:
			
 
				-    """检查标题是否匹配词组规则"""
			
 
				-    # 防御性类型检查：确保 title 是有效字符串
			
 
				-    if not isinstance(title, str):
			
 
				-        title = str(title) if title is not None else ""
			
 
				-    if not title.strip():
			
 
				-        return False
			
 
				-
			
 
				-    title_lower = title.lower()
			
 
				-
			
 
				-    # 全局过滤检查（优先级最高）
			
 
				-    if global_filters:
			
 
				-        if any(global_word.lower() in title_lower for global_word in global_filters):
			
 
				-            return False
			
 
				-
			
 
				-    # 如果没有配置词组，则匹配所有标题（支持显示全部新闻）
			
 
				-    if not word_groups:
			
 
				-        return True
			
 
				-
			
 
				-    # 过滤词检查
			
 
				-    if any(filter_word.lower() in title_lower for filter_word in filter_words):
			
 
				-        return False
			
 
				-
			
 
				-    # 词组匹配检查
			
 
				-    for group in word_groups:
			
 
				-        required_words = group["required"]
			
 
				-        normal_words = group["normal"]
			
 
				-
			
 
				-        # 必须词检查
			
 
				-        if required_words:
			
 
				-            all_required_present = all(
			
 
				-                req_word.lower() in title_lower for req_word in required_words
			
 
				-            )
			
 
				-            if not all_required_present:
			
 
				-                continue
			
 
				-
			
 
				-        # 普通词检查
			
 
				-        if normal_words:
			
 
				-            any_normal_present = any(
			
 
				-                normal_word.lower() in title_lower for normal_word in normal_words
			
 
				-            )
			
 
				-            if not any_normal_present:
			
 
				-                continue
			
 
				-
			
 
				-        return True
			
 
				-
			
 
				-    return False
			
 
				-
			
 
				-
			
 
				-def format_time_display(first_time: str, last_time: str) -> str:
			
 
				-    """格式化时间显示"""
			
 
				-    if not first_time:
			
 
				-        return ""
			
 
				-    if first_time == last_time or not last_time:
			
 
				-        return first_time
			
 
				-    else:
			
 
				-        return f"[{first_time} ~ {last_time}]"
			
 
				-
			
 
				-
			
 
				-def format_rank_display(ranks: List[int], rank_threshold: int, format_type: str) -> str:
			
 
				-    """统一的排名格式化方法"""
			
 
				-    if not ranks:
			
 
				-        return ""
			
 
				-
			
 
				-    unique_ranks = sorted(set(ranks))
			
 
				-    min_rank = unique_ranks[0]
			
 
				-    max_rank = unique_ranks[-1]
			
 
				-
			
 
				-    if format_type == "html":
			
 
				-        highlight_start = "<font color='red'><strong>"
			
 
				-        highlight_end = "</strong></font>"
			
 
				-    elif format_type == "feishu":
			
 
				-        highlight_start = "<font color='red'>**"
			
 
				-        highlight_end = "**</font>"
			
 
				-    elif format_type == "dingtalk":
			
 
				-        highlight_start = "**"
			
 
				-        highlight_end = "**"
			
 
				-    elif format_type == "wework":
			
 
				-        highlight_start = "**"
			
 
				-        highlight_end = "**"
			
 
				-    elif format_type == "telegram":
			
 
				-        highlight_start = "<b>"
			
 
				-        highlight_end = "</b>"
			
 
				-    elif format_type == "slack":
			
 
				-        highlight_start = "*"
			
 
				-        highlight_end = "*"
			
 
				-    else:
			
 
				-        highlight_start = "**"
			
 
				-        highlight_end = "**"
			
 
				-
			
 
				-    if min_rank <= rank_threshold:
			
 
				-        if min_rank == max_rank:
			
 
				-            return f"{highlight_start}[{min_rank}]{highlight_end}"
			
 
				-        else:
			
 
				-            return f"{highlight_start}[{min_rank} - {max_rank}]{highlight_end}"
			
 
				-    else:
			
 
				-        if min_rank == max_rank:
			
 
				-            return f"[{min_rank}]"
			
 
				-        else:
			
 
				-            return f"[{min_rank} - {max_rank}]"
			
 
				-
			
 
				-
			
 
				-def count_word_frequency(
			
 
				-    results: Dict,
			
 
				-    word_groups: List[Dict],
			
 
				-    filter_words: List[str],
			
 
				-    id_to_name: Dict,
			
 
				-    title_info: Optional[Dict] = None,
			
 
				-    rank_threshold: int = CONFIG["RANK_THRESHOLD"],
			
 
				-    new_titles: Optional[Dict] = None,
			
 
				-    mode: str = "daily",
			
 
				-    global_filters: Optional[List[str]] = None,
			
 
				-) -> Tuple[List[Dict], int]:
			
 
				-    """统计词频，支持必须词、频率词、过滤词、全局过滤词，并标记新增标题"""
			
 
				-
			
 
				-    # 如果没有配置词组，创建一个包含所有新闻的虚拟词组
			
 
				-    if not word_groups:
			
 
				-        print("频率词配置为空，将显示所有新闻")
			
 
				-        word_groups = [{"required": [], "normal": [], "group_key": "全部新闻"}]
			
 
				-        filter_words = []  # 清空过滤词，显示所有新闻
			
 
				-
			
 
				-    is_first_today = is_first_crawl_today()
			
 
				-
			
 
				-    # 确定处理的数据源和新增标记逻辑
			
 
				-    if mode == "incremental":
			
 
				-        if is_first_today:
			
 
				-            # 增量模式 + 当天第一次：处理所有新闻，都标记为新增
			
 
				-            results_to_process = results
			
 
				-            all_news_are_new = True
			
 
				-        else:
			
 
				-            # 增量模式 + 当天非第一次：只处理新增的新闻
			
 
				-            results_to_process = new_titles if new_titles else {}
			
 
				-            all_news_are_new = True
			
 
				-    elif mode == "current":
			
 
				-        # current 模式：只处理当前时间批次的新闻，但统计信息来自全部历史
			
 
				-        if title_info:
			
 
				-            latest_time = None
			
 
				-            for source_titles in title_info.values():
			
 
				-                for title_data in source_titles.values():
			
 
				-                    last_time = title_data.get("last_time", "")
			
 
				-                    if last_time:
			
 
				-                        if latest_time is None or last_time > latest_time:
			
 
				-                            latest_time = last_time
			
 
				-
			
 
				-            # 只处理 last_time 等于最新时间的新闻
			
 
				-            if latest_time:
			
 
				-                results_to_process = {}
			
 
				-                for source_id, source_titles in results.items():
			
 
				-                    if source_id in title_info:
			
 
				-                        filtered_titles = {}
			
 
				-                        for title, title_data in source_titles.items():
			
 
				-                            if title in title_info[source_id]:
			
 
				-                                info = title_info[source_id][title]
			
 
				-                                if info.get("last_time") == latest_time:
			
 
				-                                    filtered_titles[title] = title_data
			
 
				-                        if filtered_titles:
			
 
				-                            results_to_process[source_id] = filtered_titles
			
 
				-
			
 
				-                print(
			
 
				-                    f"当前榜单模式：最新时间 {latest_time}，筛选出 {sum(len(titles) for titles in results_to_process.values())} 条当前榜单新闻"
			
 
				-                )
			
 
				-            else:
			
 
				-                results_to_process = results
			
 
				-        else:
			
 
				-            results_to_process = results
			
 
				-        all_news_are_new = False
			
 
				-    else:
			
 
				-        # 当日汇总模式：处理所有新闻
			
 
				-        results_to_process = results
			
 
				-        all_news_are_new = False
			
 
				-        total_input_news = sum(len(titles) for titles in results.values())
			
 
				-        filter_status = (
			
 
				-            "全部显示"
			
 
				-            if len(word_groups) == 1 and word_groups[0]["group_key"] == "全部新闻"
			
 
				-            else "频率词过滤"
			
 
				-        )
			
 
				-        print(f"当日汇总模式：处理 {total_input_news} 条新闻，模式：{filter_status}")
			
 
				-
			
 
				-    word_stats = {}
			
 
				-    total_titles = 0
			
 
				-    processed_titles = {}
			
 
				-    matched_new_count = 0
			
 
				-
			
 
				-    if title_info is None:
			
 
				-        title_info = {}
			
 
				-    if new_titles is None:
			
 
				-        new_titles = {}
			
 
				-
			
 
				-    for group in word_groups:
			
 
				-        group_key = group["group_key"]
			
 
				-        word_stats[group_key] = {"count": 0, "titles": {}}
			
 
				-
			
 
				-    for source_id, titles_data in results_to_process.items():
			
 
				-        total_titles += len(titles_data)
			
 
				-
			
 
				-        if source_id not in processed_titles:
			
 
				-            processed_titles[source_id] = {}
			
 
				-
			
 
				-        for title, title_data in titles_data.items():
			
 
				-            if title in processed_titles.get(source_id, {}):
			
 
				-                continue
			
 
				-
			
 
				-            # 使用统一的匹配逻辑
			
 
				-            matches_frequency_words = matches_word_groups(
			
 
				-                title, word_groups, filter_words, global_filters
			
 
				-            )
			
 
				-
			
 
				-            if not matches_frequency_words:
			
 
				-                continue
			
 
				-
			
 
				-            # 如果是增量模式或 current 模式第一次，统计匹配的新增新闻数量
			
 
				-            if (mode == "incremental" and all_news_are_new) or (
			
 
				-                mode == "current" and is_first_today
			
 
				-            ):
			
 
				-                matched_new_count += 1
			
 
				-
			
 
				-            source_ranks = title_data.get("ranks", [])
			
 
				-            source_url = title_data.get("url", "")
			
 
				-            source_mobile_url = title_data.get("mobileUrl", "")
			
 
				-
			
 
				-            # 找到匹配的词组（防御性转换确保类型安全）
			
 
				-            title_lower = str(title).lower() if not isinstance(title, str) else title.lower()
			
 
				-            for group in word_groups:
			
 
				-                required_words = group["required"]
			
 
				-                normal_words = group["normal"]
			
 
				-
			
 
				-                # 如果是"全部新闻"模式，所有标题都匹配第一个（唯一的）词组
			
 
				-                if len(word_groups) == 1 and word_groups[0]["group_key"] == "全部新闻":
			
 
				-                    group_key = group["group_key"]
			
 
				-                    word_stats[group_key]["count"] += 1
			
 
				-                    if source_id not in word_stats[group_key]["titles"]:
			
 
				-                        word_stats[group_key]["titles"][source_id] = []
			
 
				-                else:
			
 
				-                    # 原有的匹配逻辑
			
 
				-                    if required_words:
			
 
				-                        all_required_present = all(
			
 
				-                            req_word.lower() in title_lower
			
 
				-                            for req_word in required_words
			
 
				-                        )
			
 
				-                        if not all_required_present:
			
 
				-                            continue
			
 
				-
			
 
				-                    if normal_words:
			
 
				-                        any_normal_present = any(
			
 
				-                            normal_word.lower() in title_lower
			
 
				-                            for normal_word in normal_words
			
 
				-                        )
			
 
				-                        if not any_normal_present:
			
 
				-                            continue
			
 
				-
			
 
				-                    group_key = group["group_key"]
			
 
				-                    word_stats[group_key]["count"] += 1
			
 
				-                    if source_id not in word_stats[group_key]["titles"]:
			
 
				-                        word_stats[group_key]["titles"][source_id] = []
			
 
				-
			
 
				-                first_time = ""
			
 
				-                last_time = ""
			
 
				-                count_info = 1
			
 
				-                ranks = source_ranks if source_ranks else []
			
 
				-                url = source_url
			
 
				-                mobile_url = source_mobile_url
			
 
				-
			
 
				-                # 对于 current 模式，从历史统计信息中获取完整数据
			
 
				-                if (
			
 
				-                    mode == "current"
			
 
				-                    and title_info
			
 
				-                    and source_id in title_info
			
 
				-                    and title in title_info[source_id]
			
 
				-                ):
			
 
				-                    info = title_info[source_id][title]
			
 
				-                    first_time = info.get("first_time", "")
			
 
				-                    last_time = info.get("last_time", "")
			
 
				-                    count_info = info.get("count", 1)
			
 
				-                    if "ranks" in info and info["ranks"]:
			
 
				-                        ranks = info["ranks"]
			
 
				-                    url = info.get("url", source_url)
			
 
				-                    mobile_url = info.get("mobileUrl", source_mobile_url)
			
 
				-                elif (
			
 
				-                    title_info
			
 
				-                    and source_id in title_info
			
 
				-                    and title in title_info[source_id]
			
 
				-                ):
			
 
				-                    info = title_info[source_id][title]
			
 
				-                    first_time = info.get("first_time", "")
			
 
				-                    last_time = info.get("last_time", "")
			
 
				-                    count_info = info.get("count", 1)
			
 
				-                    if "ranks" in info and info["ranks"]:
			
 
				-                        ranks = info["ranks"]
			
 
				-                    url = info.get("url", source_url)
			
 
				-                    mobile_url = info.get("mobileUrl", source_mobile_url)
			
 
				-
			
 
				-                if not ranks:
			
 
				-                    ranks = [99]
			
 
				-
			
 
				-                time_display = format_time_display(first_time, last_time)
			
 
				-
			
 
				-                source_name = id_to_name.get(source_id, source_id)
			
 
				-
			
 
				-                # 判断是否为新增
			
 
				-                is_new = False
			
 
				-                if all_news_are_new:
			
 
				-                    # 增量模式下所有处理的新闻都是新增，或者当天第一次的所有新闻都是新增
			
 
				-                    is_new = True
			
 
				-                elif new_titles and source_id in new_titles:
			
 
				-                    # 检查是否在新增列表中
			
 
				-                    new_titles_for_source = new_titles[source_id]
			
 
				-                    is_new = title in new_titles_for_source
			
 
				-
			
 
				-                word_stats[group_key]["titles"][source_id].append(
			
 
				-                    {
			
 
				-                        "title": title,
			
 
				-                        "source_name": source_name,
			
 
				-                        "first_time": first_time,
			
 
				-                        "last_time": last_time,
			
 
				-                        "time_display": time_display,
			
 
				-                        "count": count_info,
			
 
				-                        "ranks": ranks,
			
 
				-                        "rank_threshold": rank_threshold,
			
 
				-                        "url": url,
			
 
				-                        "mobileUrl": mobile_url,
			
 
				-                        "is_new": is_new,
			
 
				-                    }
			
 
				-                )
			
 
				-
			
 
				-                if source_id not in processed_titles:
			
 
				-                    processed_titles[source_id] = {}
			
 
				-                processed_titles[source_id][title] = True
			
 
				-
			
 
				-                break
			
 
				-
			
 
				-    # 最后统一打印汇总信息
			
 
				-    if mode == "incremental":
			
 
				-        if is_first_today:
			
 
				-            total_input_news = sum(len(titles) for titles in results.values())
			
 
				-            filter_status = (
			
 
				-                "全部显示"
			
 
				-                if len(word_groups) == 1 and word_groups[0]["group_key"] == "全部新闻"
			
 
				-                else "频率词匹配"
			
 
				-            )
			
 
				-            print(
			
 
				-                f"增量模式：当天第一次爬取，{total_input_news} 条新闻中有 {matched_new_count} 条{filter_status}"
			
 
				-            )
			
 
				-        else:
			
 
				-            if new_titles:
			
 
				-                total_new_count = sum(len(titles) for titles in new_titles.values())
			
 
				-                filter_status = (
			
 
				-                    "全部显示"
			
 
				-                    if len(word_groups) == 1
			
 
				-                    and word_groups[0]["group_key"] == "全部新闻"
			
 
				-                    else "匹配频率词"
			
 
				-                )
			
 
				-                print(
			
 
				-                    f"增量模式：{total_new_count} 条新增新闻中，有 {matched_new_count} 条{filter_status}"
			
 
				-                )
			
 
				-                if matched_new_count == 0 and len(word_groups) > 1:
			
 
				-                    print("增量模式：没有新增新闻匹配频率词，将不会发送通知")
			
 
				-            else:
			
 
				-                print("增量模式：未检测到新增新闻")
			
 
				-    elif mode == "current":
			
 
				-        total_input_news = sum(len(titles) for titles in results_to_process.values())
			
 
				-        if is_first_today:
			
 
				-            filter_status = (
			
 
				-                "全部显示"
			
 
				-                if len(word_groups) == 1 and word_groups[0]["group_key"] == "全部新闻"
			
 
				-                else "频率词匹配"
			
 
				-            )
			
 
				-            print(
			
 
				-                f"当前榜单模式：当天第一次爬取，{total_input_news} 条当前榜单新闻中有 {matched_new_count} 条{filter_status}"
			
 
				-            )
			
 
				-        else:
			
 
				-            matched_count = sum(stat["count"] for stat in word_stats.values())
			
 
				-            filter_status = (
			
 
				-                "全部显示"
			
 
				-                if len(word_groups) == 1 and word_groups[0]["group_key"] == "全部新闻"
			
 
				-                else "频率词匹配"
			
 
				-            )
			
 
				-            print(
			
 
				-                f"当前榜单模式：{total_input_news} 条当前榜单新闻中有 {matched_count} 条{filter_status}"
			
 
				-            )
			
 
				-
			
 
				-    stats = []
			
 
				-    # 创建 group_key 到位置和最大数量的映射
			
 
				-    group_key_to_position = {
			
 
				-        group["group_key"]: idx for idx, group in enumerate(word_groups)
			
 
				-    }
			
 
				-    group_key_to_max_count = {
			
 
				-        group["group_key"]: group.get("max_count", 0) for group in word_groups
			
 
				-    }
			
 
				-
			
 
				-    for group_key, data in word_stats.items():
			
 
				-        all_titles = []
			
 
				-        for source_id, title_list in data["titles"].items():
			
 
				-            all_titles.extend(title_list)
			
 
				-
			
 
				-        # 按权重排序
			
 
				-        sorted_titles = sorted(
			
 
				-            all_titles,
			
 
				-            key=lambda x: (
			
 
				-                -calculate_news_weight(x, rank_threshold),
			
 
				-                min(x["ranks"]) if x["ranks"] else 999,
			
 
				-                -x["count"],
			
 
				-            ),
			
 
				-        )
			
 
				-
			
 
				-        # 应用最大显示数量限制（优先级：单独配置 > 全局配置）
			
 
				-        group_max_count = group_key_to_max_count.get(group_key, 0)
			
 
				-        if group_max_count == 0:
			
 
				-            # 使用全局配置
			
 
				-            group_max_count = CONFIG.get("MAX_NEWS_PER_KEYWORD", 0)
			
 
				-
			
 
				-        if group_max_count > 0:
			
 
				-            sorted_titles = sorted_titles[:group_max_count]
			
 
				-
			
 
				-        stats.append(
			
 
				-            {
			
 
				-                "word": group_key,
			
 
				-                "count": data["count"],
			
 
				-                "position": group_key_to_position.get(group_key, 999),
			
 
				-                "titles": sorted_titles,
			
 
				-                "percentage": (
			
 
				-                    round(data["count"] / total_titles * 100, 2)
			
 
				-                    if total_titles > 0
			
 
				-                    else 0
			
 
				-                ),
			
 
				-            }
			
 
				-        )
			
 
				-
			
 
				-    # 根据配置选择排序优先级
			
 
				-    if CONFIG.get("SORT_BY_POSITION_FIRST", False):
			
 
				-        # 先按配置位置，再按热点条数
			
 
				-        stats.sort(key=lambda x: (x["position"], -x["count"]))
			
 
				-    else:
			
 
				-        # 先按热点条数，再按配置位置（原逻辑）
			
 
				-        stats.sort(key=lambda x: (-x["count"], x["position"]))
			
 
				-
			
 
				-    return stats, total_titles
			
 
				-
			
 
				-
			
 
				-# === 报告生成 ===
			
 
				-def prepare_report_data(
			
 
				-    stats: List[Dict],
			
 
				-    failed_ids: Optional[List] = None,
			
 
				-    new_titles: Optional[Dict] = None,
			
 
				-    id_to_name: Optional[Dict] = None,
			
 
				-    mode: str = "daily",
			
 
				-) -> Dict:
			
 
				-    """准备报告数据"""
			
 
				-    processed_new_titles = []
			
 
				-
			
 
				-    # 在增量模式下隐藏新增新闻区域
			
 
				-    hide_new_section = mode == "incremental"
			
 
				-
			
 
				-    # 只有在非隐藏模式下才处理新增新闻部分
			
 
				-    if not hide_new_section:
			
 
				-        filtered_new_titles = {}
			
 
				-        if new_titles and id_to_name:
			
 
				-            word_groups, filter_words, global_filters = load_frequency_words()
			
 
				-            for source_id, titles_data in new_titles.items():
			
 
				-                filtered_titles = {}
			
 
				-                for title, title_data in titles_data.items():
			
 
				-                    if matches_word_groups(title, word_groups, filter_words, global_filters):
			
 
				-                        filtered_titles[title] = title_data
			
 
				-                if filtered_titles:
			
 
				-                    filtered_new_titles[source_id] = filtered_titles
			
 
				-
			
 
				-        if filtered_new_titles and id_to_name:
			
 
				-            for source_id, titles_data in filtered_new_titles.items():
			
 
				-                source_name = id_to_name.get(source_id, source_id)
			
 
				-                source_titles = []
			
 
				-
			
 
				-                for title, title_data in titles_data.items():
			
 
				-                    url = title_data.get("url", "")
			
 
				-                    mobile_url = title_data.get("mobileUrl", "")
			
 
				-                    ranks = title_data.get("ranks", [])
			
 
				-
			
 
				-                    processed_title = {
			
 
				-                        "title": title,
			
 
				-                        "source_name": source_name,
			
 
				-                        "time_display": "",
			
 
				-                        "count": 1,
			
 
				-                        "ranks": ranks,
			
 
				-                        "rank_threshold": CONFIG["RANK_THRESHOLD"],
			
 
				-                        "url": url,
			
 
				-                        "mobile_url": mobile_url,
			
 
				-                        "is_new": True,
			
 
				-                    }
			
 
				-                    source_titles.append(processed_title)
			
 
				-
			
 
				-                if source_titles:
			
 
				-                    processed_new_titles.append(
			
 
				-                        {
			
 
				-                            "source_id": source_id,
			
 
				-                            "source_name": source_name,
			
 
				-                            "titles": source_titles,
			
 
				-                        }
			
 
				-                    )
			
 
				-
			
 
				-    processed_stats = []
			
 
				-    for stat in stats:
			
 
				-        if stat["count"] <= 0:
			
 
				-            continue
			
 
				-
			
 
				-        processed_titles = []
			
 
				-        for title_data in stat["titles"]:
			
 
				-            processed_title = {
			
 
				-                "title": title_data["title"],
			
 
				-                "source_name": title_data["source_name"],
			
 
				-                "time_display": title_data["time_display"],
			
 
				-                "count": title_data["count"],
			
 
				-                "ranks": title_data["ranks"],
			
 
				-                "rank_threshold": title_data["rank_threshold"],
			
 
				-                "url": title_data.get("url", ""),
			
 
				-                "mobile_url": title_data.get("mobileUrl", ""),
			
 
				-                "is_new": title_data.get("is_new", False),
			
 
				-            }
			
 
				-            processed_titles.append(processed_title)
			
 
				-
			
 
				-        processed_stats.append(
			
 
				-            {
			
 
				-                "word": stat["word"],
			
 
				-                "count": stat["count"],
			
 
				-                "percentage": stat.get("percentage", 0),
			
 
				-                "titles": processed_titles,
			
 
				-            }
			
 
				-        )
			
 
				-
			
 
				-    return {
			
 
				-        "stats": processed_stats,
			
 
				-        "new_titles": processed_new_titles,
			
 
				-        "failed_ids": failed_ids or [],
			
 
				-        "total_new_count": sum(
			
 
				-            len(source["titles"]) for source in processed_new_titles
			
 
				-        ),
			
 
				-    }
			
 
				-
			
 
				-
			
 
				-def format_title_for_platform(
			
 
				-    platform: str, title_data: Dict, show_source: bool = True
			
 
				-) -> str:
			
 
				-    """统一的标题格式化方法"""
			
 
				-    rank_display = format_rank_display(
			
 
				-        title_data["ranks"], title_data["rank_threshold"], platform
			
 
				-    )
			
 
				-
			
 
				-    link_url = title_data["mobile_url"] or title_data["url"]
			
 
				-
			
 
				-    cleaned_title = clean_title(title_data["title"])
			
 
				-
			
 
				-    if platform == "feishu":
			
 
				-        if link_url:
			
 
				-            formatted_title = f"[{cleaned_title}]({link_url})"
			
 
				-        else:
			
 
				-            formatted_title = cleaned_title
			
 
				-
			
 
				-        title_prefix = "🆕 " if title_data.get("is_new") else ""
			
 
				-
			
 
				-        if show_source:
			
 
				-            result = f"<font color='grey'>[{title_data['source_name']}]</font> {title_prefix}{formatted_title}"
			
 
				-        else:
			
 
				-            result = f"{title_prefix}{formatted_title}"
			
 
				-
			
 
				-        if rank_display:
			
 
				-            result += f" {rank_display}"
			
 
				-        if title_data["time_display"]:
			
 
				-            result += f" <font color='grey'>- {title_data['time_display']}</font>"
			
 
				-        if title_data["count"] > 1:
			
 
				-            result += f" <font color='green'>({title_data['count']}次)</font>"
			
 
				-
			
 
				-        return result
			
 
				-
			
 
				-    elif platform == "dingtalk":
			
 
				-        if link_url:
			
 
				-            formatted_title = f"[{cleaned_title}]({link_url})"
			
 
				-        else:
			
 
				-            formatted_title = cleaned_title
			
 
				-
			
 
				-        title_prefix = "🆕 " if title_data.get("is_new") else ""
			
 
				-
			
 
				-        if show_source:
			
 
				-            result = f"[{title_data['source_name']}] {title_prefix}{formatted_title}"
			
 
				-        else:
			
 
				-            result = f"{title_prefix}{formatted_title}"
			
 
				-
			
 
				-        if rank_display:
			
 
				-            result += f" {rank_display}"
			
 
				-        if title_data["time_display"]:
			
 
				-            result += f" - {title_data['time_display']}"
			
 
				-        if title_data["count"] > 1:
			
 
				-            result += f" ({title_data['count']}次)"
			
 
				-
			
 
				-        return result
			
 
				-
			
 
				-    elif platform in ("wework", "bark"):
			
 
				-        # WeWork 和 Bark 使用 markdown 格式
			
 
				-        if link_url:
			
 
				-            formatted_title = f"[{cleaned_title}]({link_url})"
			
 
				-        else:
			
 
				-            formatted_title = cleaned_title
			
 
				-
			
 
				-        title_prefix = "🆕 " if title_data.get("is_new") else ""
			
 
				-
			
 
				-        if show_source:
			
 
				-            result = f"[{title_data['source_name']}] {title_prefix}{formatted_title}"
			
 
				-        else:
			
 
				-            result = f"{title_prefix}{formatted_title}"
			
 
				-
			
 
				-        if rank_display:
			
 
				-            result += f" {rank_display}"
			
 
				-        if title_data["time_display"]:
			
 
				-            result += f" - {title_data['time_display']}"
			
 
				-        if title_data["count"] > 1:
			
 
				-            result += f" ({title_data['count']}次)"
			
 
				-
			
 
				-        return result
			
 
				-
			
 
				-    elif platform == "telegram":
			
 
				-        if link_url:
			
 
				-            formatted_title = f'<a href="{link_url}">{html_escape(cleaned_title)}</a>'
			
 
				-        else:
			
 
				-            formatted_title = cleaned_title
			
 
				-
			
 
				-        title_prefix = "🆕 " if title_data.get("is_new") else ""
			
 
				-
			
 
				-        if show_source:
			
 
				-            result = f"[{title_data['source_name']}] {title_prefix}{formatted_title}"
			
 
				-        else:
			
 
				-            result = f"{title_prefix}{formatted_title}"
			
 
				-
			
 
				-        if rank_display:
			
 
				-            result += f" {rank_display}"
			
 
				-        if title_data["time_display"]:
			
 
				-            result += f" <code>- {title_data['time_display']}</code>"
			
 
				-        if title_data["count"] > 1:
			
 
				-            result += f" <code>({title_data['count']}次)</code>"
			
 
				-
			
 
				-        return result
			
 
				-
			
 
				-    elif platform == "ntfy":
			
 
				-        if link_url:
			
 
				-            formatted_title = f"[{cleaned_title}]({link_url})"
			
 
				-        else:
			
 
				-            formatted_title = cleaned_title
			
 
				-
			
 
				-        title_prefix = "🆕 " if title_data.get("is_new") else ""
			
 
				-
			
 
				-        if show_source:
			
 
				-            result = f"[{title_data['source_name']}] {title_prefix}{formatted_title}"
			
 
				-        else:
			
 
				-            result = f"{title_prefix}{formatted_title}"
			
 
				-
			
 
				-        if rank_display:
			
 
				-            result += f" {rank_display}"
			
 
				-        if title_data["time_display"]:
			
 
				-            result += f" `- {title_data['time_display']}`"
			
 
				-        if title_data["count"] > 1:
			
 
				-            result += f" `({title_data['count']}次)`"
			
 
				-
			
 
				-        return result
			
 
				-
			
 
				-    elif platform == "slack":
			
 
				-        # Slack 使用 mrkdwn 格式
			
 
				-        if link_url:
			
 
				-            # Slack 链接格式: <url|text>
			
 
				-            formatted_title = f"<{link_url}|{cleaned_title}>"
			
 
				-        else:
			
 
				-            formatted_title = cleaned_title
			
 
				-
			
 
				-        title_prefix = "🆕 " if title_data.get("is_new") else ""
			
 
				-
			
 
				-        if show_source:
			
 
				-            result = f"[{title_data['source_name']}] {title_prefix}{formatted_title}"
			
 
				-        else:
			
 
				-            result = f"{title_prefix}{formatted_title}"
			
 
				-
			
 
				-        # 排名（使用 * 加粗）
			
 
				-        rank_display = format_rank_display(
			
 
				-            title_data["ranks"], title_data["rank_threshold"], "slack"
			
 
				-        )
			
 
				-        if rank_display:
			
 
				-            result += f" {rank_display}"
			
 
				-        if title_data["time_display"]:
			
 
				-            result += f" `- {title_data['time_display']}`"
			
 
				-        if title_data["count"] > 1:
			
 
				-            result += f" `({title_data['count']}次)`"
			
 
				-
			
 
				-        return result
			
 
				-
			
 
				-    elif platform == "html":
			
 
				-        rank_display = format_rank_display(
			
 
				-            title_data["ranks"], title_data["rank_threshold"], "html"
			
 
				-        )
			
 
				-
			
 
				-        link_url = title_data["mobile_url"] or title_data["url"]
			
 
				-
			
 
				-        escaped_title = html_escape(cleaned_title)
			
 
				-        escaped_source_name = html_escape(title_data["source_name"])
			
 
				-
			
 
				-        if link_url:
			
 
				-            escaped_url = html_escape(link_url)
			
 
				-            formatted_title = f'[{escaped_source_name}] <a href="{escaped_url}" target="_blank" class="news-link">{escaped_title}</a>'
			
 
				-        else:
			
 
				-            formatted_title = (
			
 
				-                f'[{escaped_source_name}] <span class="no-link">{escaped_title}</span>'
			
 
				-            )
			
 
				-
			
 
				-        if rank_display:
			
 
				-            formatted_title += f" {rank_display}"
			
 
				-        if title_data["time_display"]:
			
 
				-            escaped_time = html_escape(title_data["time_display"])
			
 
				-            formatted_title += f" <font color='grey'>- {escaped_time}</font>"
			
 
				-        if title_data["count"] > 1:
			
 
				-            formatted_title += f" <font color='green'>({title_data['count']}次)</font>"
			
 
				-
			
 
				-        if title_data.get("is_new"):
			
 
				-            formatted_title = f"<div class='new-title'>🆕 {formatted_title}</div>"
			
 
				-
			
 
				-        return formatted_title
			
 
				-
			
 
				-    else:
			
 
				-        return cleaned_title
			
 
				-
			
 
				-
			
 
				-def generate_html_report(
			
 
				-    stats: List[Dict],
			
 
				-    total_titles: int,
			
 
				-    failed_ids: Optional[List] = None,
			
 
				-    new_titles: Optional[Dict] = None,
			
 
				-    id_to_name: Optional[Dict] = None,
			
 
				-    mode: str = "daily",
			
 
				-    is_daily_summary: bool = False,
			
 
				-    update_info: Optional[Dict] = None,
			
 
				-) -> str:
			
 
				-    """生成HTML报告"""
			
 
				-    if is_daily_summary:
			
 
				-        if mode == "current":
			
 
				-            filename = "当前榜单汇总.html"
			
 
				-        elif mode == "incremental":
			
 
				-            filename = "当日增量.html"
			
 
				-        else:
			
 
				-            filename = "当日汇总.html"
			
 
				-    else:
			
 
				-        filename = f"{format_time_filename()}.html"
			
 
				-
			
 
				-    file_path = get_output_path("html", filename)
			
 
				-
			
 
				-    report_data = prepare_report_data(stats, failed_ids, new_titles, id_to_name, mode)
			
 
				-
			
 
				-    html_content = render_html_content(
			
 
				-        report_data, total_titles, is_daily_summary, mode, update_info
			
 
				-    )
			
 
				-
			
 
				-    with open(file_path, "w", encoding="utf-8") as f:
			
 
				-        f.write(html_content)
			
 
				-
			
 
				-    if is_daily_summary:
			
 
				-        # 生成到根目录（供 GitHub Pages 访问）
			
 
				-        root_index_path = Path("index.html")
			
 
				-        with open(root_index_path, "w", encoding="utf-8") as f:
			
 
				-            f.write(html_content)
			
 
				-
			
 
				-        # 同时生成到 output 目录（供 Docker Volume 挂载访问）
			
 
				-        output_index_path = Path("output") / "index.html"
			
 
				-        ensure_directory_exists("output")
			
 
				-        with open(output_index_path, "w", encoding="utf-8") as f:
			
 
				-            f.write(html_content)
			
 
				-
			
 
				-    return file_path
			
 
				-
			
 
				-
			
 
				-def render_html_content(
			
 
				-    report_data: Dict,
			
 
				-    total_titles: int,
			
 
				-    is_daily_summary: bool = False,
			
 
				-    mode: str = "daily",
			
 
				-    update_info: Optional[Dict] = None,
			
 
				-) -> str:
			
 
				-    """渲染HTML内容"""
			
 
				-    html = """
			
 
				-    <!DOCTYPE html>
			
 
				-    <html>
			
 
				-    <head>
			
 
				-        <meta charset="UTF-8">
			
 
				-        <meta name="viewport" content="width=device-width, initial-scale=1.0">
			
 
				-        <title>热点新闻分析</title>
			
 
				-        <script src="https://cdnjs.cloudflare.com/ajax/libs/html2canvas/1.4.1/html2canvas.min.js" integrity="sha512-BNaRQnYJYiPSqHHDb58B0yaPfCu+Wgds8Gp/gU33kqBtgNS4tSPHuGibyoeqMV/TJlSKda6FXzoEyYGjTe+vXA==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
			
 
				-        <style>
			
 
				-            * { box-sizing: border-box; }
			
 
				-            body { 
			
 
				-                font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', system-ui, sans-serif;
			
 
				-                margin: 0; 
			
 
				-                padding: 16px; 
			
 
				-                background: #fafafa;
			
 
				-                color: #333;
			
 
				-                line-height: 1.5;
			
 
				-            }
			
 
				-            
			
 
				-            .container {
			
 
				-                max-width: 600px;
			
 
				-                margin: 0 auto;
			
 
				-                background: white;
			
 
				-                border-radius: 12px;
			
 
				-                overflow: hidden;
			
 
				-                box-shadow: 0 2px 16px rgba(0,0,0,0.06);
			
 
				-            }
			
 
				-            
			
 
				-            .header {
			
 
				-                background: linear-gradient(135deg, #4f46e5 0%, #7c3aed 100%);
			
 
				-                color: white;
			
 
				-                padding: 32px 24px;
			
 
				-                text-align: center;
			
 
				-                position: relative;
			
 
				-            }
			
 
				-            
			
 
				-            .save-buttons {
			
 
				-                position: absolute;
			
 
				-                top: 16px;
			
 
				-                right: 16px;
			
 
				-                display: flex;
			
 
				-                gap: 8px;
			
 
				-            }
			
 
				-            
			
 
				-            .save-btn {
			
 
				-                background: rgba(255, 255, 255, 0.2);
			
 
				-                border: 1px solid rgba(255, 255, 255, 0.3);
			
 
				-                color: white;
			
 
				-                padding: 8px 16px;
			
 
				-                border-radius: 6px;
			
 
				-                cursor: pointer;
			
 
				-                font-size: 13px;
			
 
				-                font-weight: 500;
			
 
				-                transition: all 0.2s ease;
			
 
				-                backdrop-filter: blur(10px);
			
 
				-                white-space: nowrap;
			
 
				-            }
			
 
				-            
			
 
				-            .save-btn:hover {
			
 
				-                background: rgba(255, 255, 255, 0.3);
			
 
				-                border-color: rgba(255, 255, 255, 0.5);
			
 
				-                transform: translateY(-1px);
			
 
				-            }
			
 
				-            
			
 
				-            .save-btn:active {
			
 
				-                transform: translateY(0);
			
 
				-            }
			
 
				-            
			
 
				-            .save-btn:disabled {
			
 
				-                opacity: 0.6;
			
 
				-                cursor: not-allowed;
			
 
				-            }
			
 
				-            
			
 
				-            .header-title {
			
 
				-                font-size: 22px;
			
 
				-                font-weight: 700;
			
 
				-                margin: 0 0 20px 0;
			
 
				-            }
			
 
				-            
			
 
				-            .header-info {
			
 
				-                display: grid;
			
 
				-                grid-template-columns: 1fr 1fr;
			
 
				-                gap: 16px;
			
 
				-                font-size: 14px;
			
 
				-                opacity: 0.95;
			
 
				-            }
			
 
				-            
			
 
				-            .info-item {
			
 
				-                text-align: center;
			
 
				-            }
			
 
				-            
			
 
				-            .info-label {
			
 
				-                display: block;
			
 
				-                font-size: 12px;
			
 
				-                opacity: 0.8;
			
 
				-                margin-bottom: 4px;
			
 
				-            }
			
 
				-            
			
 
				-            .info-value {
			
 
				-                font-weight: 600;
			
 
				-                font-size: 16px;
			
 
				-            }
			
 
				-            
			
 
				-            .content {
			
 
				-                padding: 24px;
			
 
				-            }
			
 
				-            
			
 
				-            .word-group {
			
 
				-                margin-bottom: 40px;
			
 
				-            }
			
 
				-            
			
 
				-            .word-group:first-child {
			
 
				-                margin-top: 0;
			
 
				-            }
			
 
				-            
			
 
				-            .word-header {
			
 
				-                display: flex;
			
 
				-                align-items: center;
			
 
				-                justify-content: space-between;
			
 
				-                margin-bottom: 20px;
			
 
				-                padding-bottom: 8px;
			
 
				-                border-bottom: 1px solid #f0f0f0;
			
 
				-            }
			
 
				-            
			
 
				-            .word-info {
			
 
				-                display: flex;
			
 
				-                align-items: center;
			
 
				-                gap: 12px;
			
 
				-            }
			
 
				-            
			
 
				-            .word-name {
			
 
				-                font-size: 17px;
			
 
				-                font-weight: 600;
			
 
				-                color: #1a1a1a;
			
 
				-            }
			
 
				-            
			
 
				-            .word-count {
			
 
				-                color: #666;
			
 
				-                font-size: 13px;
			
 
				-                font-weight: 500;
			
 
				-            }
			
 
				-            
			
 
				-            .word-count.hot { color: #dc2626; font-weight: 600; }
			
 
				-            .word-count.warm { color: #ea580c; font-weight: 600; }
			
 
				-            
			
 
				-            .word-index {
			
 
				-                color: #999;
			
 
				-                font-size: 12px;
			
 
				-            }
			
 
				-            
			
 
				-            .news-item {
			
 
				-                margin-bottom: 20px;
			
 
				-                padding: 16px 0;
			
 
				-                border-bottom: 1px solid #f5f5f5;
			
 
				-                position: relative;
			
 
				-                display: flex;
			
 
				-                gap: 12px;
			
 
				-                align-items: center;
			
 
				-            }
			
 
				-            
			
 
				-            .news-item:last-child {
			
 
				-                border-bottom: none;
			
 
				-            }
			
 
				-            
			
 
				-            .news-item.new::after {
			
 
				-                content: "NEW";
			
 
				-                position: absolute;
			
 
				-                top: 12px;
			
 
				-                right: 0;
			
 
				-                background: #fbbf24;
			
 
				-                color: #92400e;
			
 
				-                font-size: 9px;
			
 
				-                font-weight: 700;
			
 
				-                padding: 3px 6px;
			
 
				-                border-radius: 4px;
			
 
				-                letter-spacing: 0.5px;
			
 
				-            }
			
 
				-            
			
 
				-            .news-number {
			
 
				-                color: #999;
			
 
				-                font-size: 13px;
			
 
				-                font-weight: 600;
			
 
				-                min-width: 20px;
			
 
				-                text-align: center;
			
 
				-                flex-shrink: 0;
			
 
				-                background: #f8f9fa;
			
 
				-                border-radius: 50%;
			
 
				-                width: 24px;
			
 
				-                height: 24px;
			
 
				-                display: flex;
			
 
				-                align-items: center;
			
 
				-                justify-content: center;
			
 
				-                align-self: flex-start;
			
 
				-                margin-top: 8px;
			
 
				-            }
			
 
				-            
			
 
				-            .news-content {
			
 
				-                flex: 1;
			
 
				-                min-width: 0;
			
 
				-                padding-right: 40px;
			
 
				-            }
			
 
				-            
			
 
				-            .news-item.new .news-content {
			
 
				-                padding-right: 50px;
			
 
				-            }
			
 
				-            
			
 
				-            .news-header {
			
 
				-                display: flex;
			
 
				-                align-items: center;
			
 
				-                gap: 8px;
			
 
				-                margin-bottom: 8px;
			
 
				-                flex-wrap: wrap;
			
 
				-            }
			
 
				-            
			
 
				-            .source-name {
			
 
				-                color: #666;
			
 
				-                font-size: 12px;
			
 
				-                font-weight: 500;
			
 
				-            }
			
 
				-            
			
 
				-            .rank-num {
			
 
				-                color: #fff;
			
 
				-                background: #6b7280;
			
 
				-                font-size: 10px;
			
 
				-                font-weight: 700;
			
 
				-                padding: 2px 6px;
			
 
				-                border-radius: 10px;
			
 
				-                min-width: 18px;
			
 
				-                text-align: center;
			
 
				-            }
			
 
				-            
			
 
				-            .rank-num.top { background: #dc2626; }
			
 
				-            .rank-num.high { background: #ea580c; }
			
 
				-            
			
 
				-            .time-info {
			
 
				-                color: #999;
			
 
				-                font-size: 11px;
			
 
				-            }
			
 
				-            
			
 
				-            .count-info {
			
 
				-                color: #059669;
			
 
				-                font-size: 11px;
			
 
				-                font-weight: 500;
			
 
				-            }
			
 
				-            
			
 
				-            .news-title {
			
 
				-                font-size: 15px;
			
 
				-                line-height: 1.4;
			
 
				-                color: #1a1a1a;
			
 
				-                margin: 0;
			
 
				-            }
			
 
				-            
			
 
				-            .news-link {
			
 
				-                color: #2563eb;
			
 
				-                text-decoration: none;
			
 
				-            }
			
 
				-            
			
 
				-            .news-link:hover {
			
 
				-                text-decoration: underline;
			
 
				-            }
			
 
				-            
			
 
				-            .news-link:visited {
			
 
				-                color: #7c3aed;
			
 
				-            }
			
 
				-            
			
 
				-            .new-section {
			
 
				-                margin-top: 40px;
			
 
				-                padding-top: 24px;
			
 
				-                border-top: 2px solid #f0f0f0;
			
 
				-            }
			
 
				-            
			
 
				-            .new-section-title {
			
 
				-                color: #1a1a1a;
			
 
				-                font-size: 16px;
			
 
				-                font-weight: 600;
			
 
				-                margin: 0 0 20px 0;
			
 
				-            }
			
 
				-            
			
 
				-            .new-source-group {
			
 
				-                margin-bottom: 24px;
			
 
				-            }
			
 
				-            
			
 
				-            .new-source-title {
			
 
				-                color: #666;
			
 
				-                font-size: 13px;
			
 
				-                font-weight: 500;
			
 
				-                margin: 0 0 12px 0;
			
 
				-                padding-bottom: 6px;
			
 
				-                border-bottom: 1px solid #f5f5f5;
			
 
				-            }
			
 
				-            
			
 
				-            .new-item {
			
 
				-                display: flex;
			
 
				-                align-items: center;
			
 
				-                gap: 12px;
			
 
				-                padding: 8px 0;
			
 
				-                border-bottom: 1px solid #f9f9f9;
			
 
				-            }
			
 
				-            
			
 
				-            .new-item:last-child {
			
 
				-                border-bottom: none;
			
 
				-            }
			
 
				-            
			
 
				-            .new-item-number {
			
 
				-                color: #999;
			
 
				-                font-size: 12px;
			
 
				-                font-weight: 600;
			
 
				-                min-width: 18px;
			
 
				-                text-align: center;
			
 
				-                flex-shrink: 0;
			
 
				-                background: #f8f9fa;
			
 
				-                border-radius: 50%;
			
 
				-                width: 20px;
			
 
				-                height: 20px;
			
 
				-                display: flex;
			
 
				-                align-items: center;
			
 
				-                justify-content: center;
			
 
				-            }
			
 
				-            
			
 
				-            .new-item-rank {
			
 
				-                color: #fff;
			
 
				-                background: #6b7280;
			
 
				-                font-size: 10px;
			
 
				-                font-weight: 700;
			
 
				-                padding: 3px 6px;
			
 
				-                border-radius: 8px;
			
 
				-                min-width: 20px;
			
 
				-                text-align: center;
			
 
				-                flex-shrink: 0;
			
 
				-            }
			
 
				-            
			
 
				-            .new-item-rank.top { background: #dc2626; }
			
 
				-            .new-item-rank.high { background: #ea580c; }
			
 
				-            
			
 
				-            .new-item-content {
			
 
				-                flex: 1;
			
 
				-                min-width: 0;
			
 
				-            }
			
 
				-            
			
 
				-            .new-item-title {
			
 
				-                font-size: 14px;
			
 
				-                line-height: 1.4;
			
 
				-                color: #1a1a1a;
			
 
				-                margin: 0;
			
 
				-            }
			
 
				-            
			
 
				-            .error-section {
			
 
				-                background: #fef2f2;
			
 
				-                border: 1px solid #fecaca;
			
 
				-                border-radius: 8px;
			
 
				-                padding: 16px;
			
 
				-                margin-bottom: 24px;
			
 
				-            }
			
 
				-            
			
 
				-            .error-title {
			
 
				-                color: #dc2626;
			
 
				-                font-size: 14px;
			
 
				-                font-weight: 600;
			
 
				-                margin: 0 0 8px 0;
			
 
				-            }
			
 
				-            
			
 
				-            .error-list {
			
 
				-                list-style: none;
			
 
				-                padding: 0;
			
 
				-                margin: 0;
			
 
				-            }
			
 
				-            
			
 
				-            .error-item {
			
 
				-                color: #991b1b;
			
 
				-                font-size: 13px;
			
 
				-                padding: 2px 0;
			
 
				-                font-family: 'SF Mono', Consolas, monospace;
			
 
				-            }
			
 
				-            
			
 
				-            .footer {
			
 
				-                margin-top: 32px;
			
 
				-                padding: 20px 24px;
			
 
				-                background: #f8f9fa;
			
 
				-                border-top: 1px solid #e5e7eb;
			
 
				-                text-align: center;
			
 
				-            }
			
 
				-            
			
 
				-            .footer-content {
			
 
				-                font-size: 13px;
			
 
				-                color: #6b7280;
			
 
				-                line-height: 1.6;
			
 
				-            }
			
 
				-            
			
 
				-            .footer-link {
			
 
				-                color: #4f46e5;
			
 
				-                text-decoration: none;
			
 
				-                font-weight: 500;
			
 
				-                transition: color 0.2s ease;
			
 
				-            }
			
 
				-            
			
 
				-            .footer-link:hover {
			
 
				-                color: #7c3aed;
			
 
				-                text-decoration: underline;
			
 
				-            }
			
 
				-            
			
 
				-            .project-name {
			
 
				-                font-weight: 600;
			
 
				-                color: #374151;
			
 
				-            }
			
 
				-            
			
 
				-            @media (max-width: 480px) {
			
 
				-                body { padding: 12px; }
			
 
				-                .header { padding: 24px 20px; }
			
 
				-                .content { padding: 20px; }
			
 
				-                .footer { padding: 16px 20px; }
			
 
				-                .header-info { grid-template-columns: 1fr; gap: 12px; }
			
 
				-                .news-header { gap: 6px; }
			
 
				-                .news-content { padding-right: 45px; }
			
 
				-                .news-item { gap: 8px; }
			
 
				-                .new-item { gap: 8px; }
			
 
				-                .news-number { width: 20px; height: 20px; font-size: 12px; }
			
 
				-                .save-buttons {
			
 
				-                    position: static;
			
 
				-                    margin-bottom: 16px;
			
 
				-                    display: flex;
			
 
				-                    gap: 8px;
			
 
				-                    justify-content: center;
			
 
				-                    flex-direction: column;
			
 
				-                    width: 100%;
			
 
				-                }
			
 
				-                .save-btn {
			
 
				-                    width: 100%;
			
 
				-                }
			
 
				-            }
			
 
				-        </style>
			
 
				-    </head>
			
 
				-    <body>
			
 
				-        <div class="container">
			
 
				-            <div class="header">
			
 
				-                <div class="save-buttons">
			
 
				-                    <button class="save-btn" onclick="saveAsImage()">保存为图片</button>
			
 
				-                    <button class="save-btn" onclick="saveAsMultipleImages()">分段保存</button>
			
 
				-                </div>
			
 
				-                <div class="header-title">热点新闻分析</div>
			
 
				-                <div class="header-info">
			
 
				-                    <div class="info-item">
			
 
				-                        <span class="info-label">报告类型</span>
			
 
				-                        <span class="info-value">"""
			
 
				-
			
 
				-    # 处理报告类型显示
			
 
				-    if is_daily_summary:
			
 
				-        if mode == "current":
			
 
				-            html += "当前榜单"
			
 
				-        elif mode == "incremental":
			
 
				-            html += "增量模式"
			
 
				-        else:
			
 
				-            html += "当日汇总"
			
 
				-    else:
			
 
				-        html += "实时分析"
			
 
				-
			
 
				-    html += """</span>
			
 
				-                    </div>
			
 
				-                    <div class="info-item">
			
 
				-                        <span class="info-label">新闻总数</span>
			
 
				-                        <span class="info-value">"""
			
 
				-
			
 
				-    html += f"{total_titles} 条"
			
 
				-
			
 
				-    # 计算筛选后的热点新闻数量
			
 
				-    hot_news_count = sum(len(stat["titles"]) for stat in report_data["stats"])
			
 
				-
			
 
				-    html += """</span>
			
 
				-                    </div>
			
 
				-                    <div class="info-item">
			
 
				-                        <span class="info-label">热点新闻</span>
			
 
				-                        <span class="info-value">"""
			
 
				-
			
 
				-    html += f"{hot_news_count} 条"
			
 
				-
			
 
				-    html += """</span>
			
 
				-                    </div>
			
 
				-                    <div class="info-item">
			
 
				-                        <span class="info-label">生成时间</span>
			
 
				-                        <span class="info-value">"""
			
 
				-
			
 
				-    now = get_beijing_time()
			
 
				-    html += now.strftime("%m-%d %H:%M")
			
 
				-
			
 
				-    html += """</span>
			
 
				-                    </div>
			
 
				-                </div>
			
 
				-            </div>
			
 
				-            
			
 
				-            <div class="content">"""
			
 
				-
			
 
				-    # 处理失败ID错误信息
			
 
				-    if report_data["failed_ids"]:
			
 
				-        html += """
			
 
				-                <div class="error-section">
			
 
				-                    <div class="error-title">⚠️ 请求失败的平台</div>
			
 
				-                    <ul class="error-list">"""
			
 
				-        for id_value in report_data["failed_ids"]:
			
 
				-            html += f'<li class="error-item">{html_escape(id_value)}</li>'
			
 
				-        html += """
			
 
				-                    </ul>
			
 
				-                </div>"""
			
 
				-
			
 
				-    # 生成热点词汇统计部分的HTML
			
 
				-    stats_html = ""
			
 
				-    if report_data["stats"]:
			
 
				-        total_count = len(report_data["stats"])
			
 
				-
			
 
				-        for i, stat in enumerate(report_data["stats"], 1):
			
 
				-            count = stat["count"]
			
 
				-
			
 
				-            # 确定热度等级
			
 
				-            if count >= 10:
			
 
				-                count_class = "hot"
			
 
				-            elif count >= 5:
			
 
				-                count_class = "warm"
			
 
				-            else:
			
 
				-                count_class = ""
			
 
				-
			
 
				-            escaped_word = html_escape(stat["word"])
			
 
				-
			
 
				-            stats_html += f"""
			
 
				-                <div class="word-group">
			
 
				-                    <div class="word-header">
			
 
				-                        <div class="word-info">
			
 
				-                            <div class="word-name">{escaped_word}</div>
			
 
				-                            <div class="word-count {count_class}">{count} 条</div>
			
 
				-                        </div>
			
 
				-                        <div class="word-index">{i}/{total_count}</div>
			
 
				-                    </div>"""
			
 
				-
			
 
				-            # 处理每个词组下的新闻标题，给每条新闻标上序号
			
 
				-            for j, title_data in enumerate(stat["titles"], 1):
			
 
				-                is_new = title_data.get("is_new", False)
			
 
				-                new_class = "new" if is_new else ""
			
 
				-
			
 
				-                stats_html += f"""
			
 
				-                    <div class="news-item {new_class}">
			
 
				-                        <div class="news-number">{j}</div>
			
 
				-                        <div class="news-content">
			
 
				-                            <div class="news-header">
			
 
				-                                <span class="source-name">{html_escape(title_data["source_name"])}</span>"""
			
 
				-
			
 
				-                # 处理排名显示
			
 
				-                ranks = title_data.get("ranks", [])
			
 
				-                if ranks:
			
 
				-                    min_rank = min(ranks)
			
 
				-                    max_rank = max(ranks)
			
 
				-                    rank_threshold = title_data.get("rank_threshold", 10)
			
 
				-
			
 
				-                    # 确定排名等级
			
 
				-                    if min_rank <= 3:
			
 
				-                        rank_class = "top"
			
 
				-                    elif min_rank <= rank_threshold:
			
 
				-                        rank_class = "high"
			
 
				-                    else:
			
 
				-                        rank_class = ""
			
 
				-
			
 
				-                    if min_rank == max_rank:
			
 
				-                        rank_text = str(min_rank)
			
 
				-                    else:
			
 
				-                        rank_text = f"{min_rank}-{max_rank}"
			
 
				-
			
 
				-                    stats_html += f'<span class="rank-num {rank_class}">{rank_text}</span>'
			
 
				-
			
 
				-                # 处理时间显示
			
 
				-                time_display = title_data.get("time_display", "")
			
 
				-                if time_display:
			
 
				-                    # 简化时间显示格式，将波浪线替换为~
			
 
				-                    simplified_time = (
			
 
				-                        time_display.replace(" ~ ", "~")
			
 
				-                        .replace("[", "")
			
 
				-                        .replace("]", "")
			
 
				-                    )
			
 
				-                    stats_html += (
			
 
				-                        f'<span class="time-info">{html_escape(simplified_time)}</span>'
			
 
				-                    )
			
 
				-
			
 
				-                # 处理出现次数
			
 
				-                count_info = title_data.get("count", 1)
			
 
				-                if count_info > 1:
			
 
				-                    stats_html += f'<span class="count-info">{count_info}次</span>'
			
 
				-
			
 
				-                stats_html += """
			
 
				-                            </div>
			
 
				-                            <div class="news-title">"""
			
 
				-
			
 
				-                # 处理标题和链接
			
 
				-                escaped_title = html_escape(title_data["title"])
			
 
				-                link_url = title_data.get("mobile_url") or title_data.get("url", "")
			
 
				-
			
 
				-                if link_url:
			
 
				-                    escaped_url = html_escape(link_url)
			
 
				-                    stats_html += f'<a href="{escaped_url}" target="_blank" class="news-link">{escaped_title}</a>'
			
 
				-                else:
			
 
				-                    stats_html += escaped_title
			
 
				-
			
 
				-                stats_html += """
			
 
				-                            </div>
			
 
				-                        </div>
			
 
				-                    </div>"""
			
 
				-
			
 
				-            stats_html += """
			
 
				-                </div>"""
			
 
				-
			
 
				-    # 生成新增新闻区域的HTML
			
 
				-    new_titles_html = ""
			
 
				-    if report_data["new_titles"]:
			
 
				-        new_titles_html += f"""
			
 
				-                <div class="new-section">
			
 
				-                    <div class="new-section-title">本次新增热点 (共 {report_data['total_new_count']} 条)</div>"""
			
 
				-
			
 
				-        for source_data in report_data["new_titles"]:
			
 
				-            escaped_source = html_escape(source_data["source_name"])
			
 
				-            titles_count = len(source_data["titles"])
			
 
				-
			
 
				-            new_titles_html += f"""
			
 
				-                    <div class="new-source-group">
			
 
				-                        <div class="new-source-title">{escaped_source} · {titles_count}条</div>"""
			
 
				-
			
 
				-            # 为新增新闻也添加序号
			
 
				-            for idx, title_data in enumerate(source_data["titles"], 1):
			
 
				-                ranks = title_data.get("ranks", [])
			
 
				-
			
 
				-                # 处理新增新闻的排名显示
			
 
				-                rank_class = ""
			
 
				-                if ranks:
			
 
				-                    min_rank = min(ranks)
			
 
				-                    if min_rank <= 3:
			
 
				-                        rank_class = "top"
			
 
				-                    elif min_rank <= title_data.get("rank_threshold", 10):
			
 
				-                        rank_class = "high"
			
 
				-
			
 
				-                    if len(ranks) == 1:
			
 
				-                        rank_text = str(ranks[0])
			
 
				-                    else:
			
 
				-                        rank_text = f"{min(ranks)}-{max(ranks)}"
			
 
				-                else:
			
 
				-                    rank_text = "?"
			
 
				-
			
 
				-                new_titles_html += f"""
			
 
				-                        <div class="new-item">
			
 
				-                            <div class="new-item-number">{idx}</div>
			
 
				-                            <div class="new-item-rank {rank_class}">{rank_text}</div>
			
 
				-                            <div class="new-item-content">
			
 
				-                                <div class="new-item-title">"""
			
 
				-
			
 
				-                # 处理新增新闻的链接
			
 
				-                escaped_title = html_escape(title_data["title"])
			
 
				-                link_url = title_data.get("mobile_url") or title_data.get("url", "")
			
 
				-
			
 
				-                if link_url:
			
 
				-                    escaped_url = html_escape(link_url)
			
 
				-                    new_titles_html += f'<a href="{escaped_url}" target="_blank" class="news-link">{escaped_title}</a>'
			
 
				-                else:
			
 
				-                    new_titles_html += escaped_title
			
 
				-
			
 
				-                new_titles_html += """
			
 
				-                                </div>
			
 
				-                            </div>
			
 
				-                        </div>"""
			
 
				-
			
 
				-            new_titles_html += """
			
 
				-                    </div>"""
			
 
				-
			
 
				-        new_titles_html += """
			
 
				-                </div>"""
			
 
				-
			
 
				-    # 根据配置决定内容顺序
			
 
				-    if CONFIG.get("REVERSE_CONTENT_ORDER", False):
			
 
				-        # 新增热点在前，热点词汇统计在后
			
 
				-        html += new_titles_html + stats_html
			
 
				-    else:
			
 
				-        # 默认：热点词汇统计在前，新增热点在后
			
 
				-        html += stats_html + new_titles_html
			
 
				-
			
 
				-    html += """
			
 
				-            </div>
			
 
				-            
			
 
				-            <div class="footer">
			
 
				-                <div class="footer-content">
			
 
				-                    由 <span class="project-name">TrendRadar</span> 生成 · 
			
 
				-                    <a href="https://github.com/sansan0/TrendRadar" target="_blank" class="footer-link">
			
 
				-                        GitHub 开源项目
			
 
				-                    </a>"""
			
 
				-
			
 
				-    if update_info:
			
 
				-        html += f"""
			
 
				-                    <br>
			
 
				-                    <span style="color: #ea580c; font-weight: 500;">
			
 
				-                        发现新版本 {update_info['remote_version']}，当前版本 {update_info['current_version']}
			
 
				-                    </span>"""
			
 
				-
			
 
				-    html += """
			
 
				-                </div>
			
 
				-            </div>
			
 
				-        </div>
			
 
				-        
			
 
				-        <script>
			
 
				-            async function saveAsImage() {
			
 
				-                const button = event.target;
			
 
				-                const originalText = button.textContent;
			
 
				-                
			
 
				-                try {
			
 
				-                    button.textContent = '生成中...';
			
 
				-                    button.disabled = true;
			
 
				-                    window.scrollTo(0, 0);
			
 
				-                    
			
 
				-                    // 等待页面稳定
			
 
				-                    await new Promise(resolve => setTimeout(resolve, 200));
			
 
				-                    
			
 
				-                    // 截图前隐藏按钮
			
 
				-                    const buttons = document.querySelector('.save-buttons');
			
 
				-                    buttons.style.visibility = 'hidden';
			
 
				-                    
			
 
				-                    // 再次等待确保按钮完全隐藏
			
 
				-                    await new Promise(resolve => setTimeout(resolve, 100));
			
 
				-                    
			
 
				-                    const container = document.querySelector('.container');
			
 
				-                    
			
 
				-                    const canvas = await html2canvas(container, {
			
 
				-                        backgroundColor: '#ffffff',
			
 
				-                        scale: 1.5,
			
 
				-                        useCORS: true,
			
 
				-                        allowTaint: false,
			
 
				-                        imageTimeout: 10000,
			
 
				-                        removeContainer: false,
			
 
				-                        foreignObjectRendering: false,
			
 
				-                        logging: false,
			
 
				-                        width: container.offsetWidth,
			
 
				-                        height: container.offsetHeight,
			
 
				-                        x: 0,
			
 
				-                        y: 0,
			
 
				-                        scrollX: 0,
			
 
				-                        scrollY: 0,
			
 
				-                        windowWidth: window.innerWidth,
			
 
				-                        windowHeight: window.innerHeight
			
 
				-                    });
			
 
				-                    
			
 
				-                    buttons.style.visibility = 'visible';
			
 
				-                    
			
 
				-                    const link = document.createElement('a');
			
 
				-                    const now = new Date();
			
 
				-                    const filename = `TrendRadar_热点新闻分析_${now.getFullYear()}${String(now.getMonth() + 1).padStart(2, '0')}${String(now.getDate()).padStart(2, '0')}_${String(now.getHours()).padStart(2, '0')}${String(now.getMinutes()).padStart(2, '0')}.png`;
			
 
				-                    
			
 
				-                    link.download = filename;
			
 
				-                    link.href = canvas.toDataURL('image/png', 1.0);
			
 
				-                    
			
 
				-                    // 触发下载
			
 
				-                    document.body.appendChild(link);
			
 
				-                    link.click();
			
 
				-                    document.body.removeChild(link);
			
 
				-                    
			
 
				-                    button.textContent = '保存成功!';
			
 
				-                    setTimeout(() => {
			
 
				-                        button.textContent = originalText;
			
 
				-                        button.disabled = false;
			
 
				-                    }, 2000);
			
 
				-                    
			
 
				-                } catch (error) {
			
 
				-                    const buttons = document.querySelector('.save-buttons');
			
 
				-                    buttons.style.visibility = 'visible';
			
 
				-                    button.textContent = '保存失败';
			
 
				-                    setTimeout(() => {
			
 
				-                        button.textContent = originalText;
			
 
				-                        button.disabled = false;
			
 
				-                    }, 2000);
			
 
				-                }
			
 
				-            }
			
 
				-            
			
 
				-            async function saveAsMultipleImages() {
			
 
				-                const button = event.target;
			
 
				-                const originalText = button.textContent;
			
 
				-                const container = document.querySelector('.container');
			
 
				-                const scale = 1.5; 
			
 
				-                const maxHeight = 5000 / scale;
			
 
				-                
			
 
				-                try {
			
 
				-                    button.textContent = '分析中...';
			
 
				-                    button.disabled = true;
			
 
				-                    
			
 
				-                    // 获取所有可能的分割元素
			
 
				-                    const newsItems = Array.from(container.querySelectorAll('.news-item'));
			
 
				-                    const wordGroups = Array.from(container.querySelectorAll('.word-group'));
			
 
				-                    const newSection = container.querySelector('.new-section');
			
 
				-                    const errorSection = container.querySelector('.error-section');
			
 
				-                    const header = container.querySelector('.header');
			
 
				-                    const footer = container.querySelector('.footer');
			
 
				-                    
			
 
				-                    // 计算元素位置和高度
			
 
				-                    const containerRect = container.getBoundingClientRect();
			
 
				-                    const elements = [];
			
 
				-                    
			
 
				-                    // 添加header作为必须包含的元素
			
 
				-                    elements.push({
			
 
				-                        type: 'header',
			
 
				-                        element: header,
			
 
				-                        top: 0,
			
 
				-                        bottom: header.offsetHeight,
			
 
				-                        height: header.offsetHeight
			
 
				-                    });
			
 
				-                    
			
 
				-                    // 添加错误信息（如果存在）
			
 
				-                    if (errorSection) {
			
 
				-                        const rect = errorSection.getBoundingClientRect();
			
 
				-                        elements.push({
			
 
				-                            type: 'error',
			
 
				-                            element: errorSection,
			
 
				-                            top: rect.top - containerRect.top,
			
 
				-                            bottom: rect.bottom - containerRect.top,
			
 
				-                            height: rect.height
			
 
				-                        });
			
 
				-                    }
			
 
				-                    
			
 
				-                    // 按word-group分组处理news-item
			
 
				-                    wordGroups.forEach(group => {
			
 
				-                        const groupRect = group.getBoundingClientRect();
			
 
				-                        const groupNewsItems = group.querySelectorAll('.news-item');
			
 
				-                        
			
 
				-                        // 添加word-group的header部分
			
 
				-                        const wordHeader = group.querySelector('.word-header');
			
 
				-                        if (wordHeader) {
			
 
				-                            const headerRect = wordHeader.getBoundingClientRect();
			
 
				-                            elements.push({
			
 
				-                                type: 'word-header',
			
 
				-                                element: wordHeader,
			
 
				-                                parent: group,
			
 
				-                                top: groupRect.top - containerRect.top,
			
 
				-                                bottom: headerRect.bottom - containerRect.top,
			
 
				-                                height: headerRect.height
			
 
				-                            });
			
 
				-                        }
			
 
				-                        
			
 
				-                        // 添加每个news-item
			
 
				-                        groupNewsItems.forEach(item => {
			
 
				-                            const rect = item.getBoundingClientRect();
			
 
				-                            elements.push({
			
 
				-                                type: 'news-item',
			
 
				-                                element: item,
			
 
				-                                parent: group,
			
 
				-                                top: rect.top - containerRect.top,
			
 
				-                                bottom: rect.bottom - containerRect.top,
			
 
				-                                height: rect.height
			
 
				-                            });
			
 
				-                        });
			
 
				-                    });
			
 
				-                    
			
 
				-                    // 添加新增新闻部分
			
 
				-                    if (newSection) {
			
 
				-                        const rect = newSection.getBoundingClientRect();
			
 
				-                        elements.push({
			
 
				-                            type: 'new-section',
			
 
				-                            element: newSection,
			
 
				-                            top: rect.top - containerRect.top,
			
 
				-                            bottom: rect.bottom - containerRect.top,
			
 
				-                            height: rect.height
			
 
				-                        });
			
 
				-                    }
			
 
				-                    
			
 
				-                    // 添加footer
			
 
				-                    const footerRect = footer.getBoundingClientRect();
			
 
				-                    elements.push({
			
 
				-                        type: 'footer',
			
 
				-                        element: footer,
			
 
				-                        top: footerRect.top - containerRect.top,
			
 
				-                        bottom: footerRect.bottom - containerRect.top,
			
 
				-                        height: footer.offsetHeight
			
 
				-                    });
			
 
				-                    
			
 
				-                    // 计算分割点
			
 
				-                    const segments = [];
			
 
				-                    let currentSegment = { start: 0, end: 0, height: 0, includeHeader: true };
			
 
				-                    let headerHeight = header.offsetHeight;
			
 
				-                    currentSegment.height = headerHeight;
			
 
				-                    
			
 
				-                    for (let i = 1; i < elements.length; i++) {
			
 
				-                        const element = elements[i];
			
 
				-                        const potentialHeight = element.bottom - currentSegment.start;
			
 
				-                        
			
 
				-                        // 检查是否需要创建新分段
			
 
				-                        if (potentialHeight > maxHeight && currentSegment.height > headerHeight) {
			
 
				-                            // 在前一个元素结束处分割
			
 
				-                            currentSegment.end = elements[i - 1].bottom;
			
 
				-                            segments.push(currentSegment);
			
 
				-                            
			
 
				-                            // 开始新分段
			
 
				-                            currentSegment = {
			
 
				-                                start: currentSegment.end,
			
 
				-                                end: 0,
			
 
				-                                height: element.bottom - currentSegment.end,
			
 
				-                                includeHeader: false
			
 
				-                            };
			
 
				-                        } else {
			
 
				-                            currentSegment.height = potentialHeight;
			
 
				-                            currentSegment.end = element.bottom;
			
 
				-                        }
			
 
				-                    }
			
 
				-                    
			
 
				-                    // 添加最后一个分段
			
 
				-                    if (currentSegment.height > 0) {
			
 
				-                        currentSegment.end = container.offsetHeight;
			
 
				-                        segments.push(currentSegment);
			
 
				-                    }
			
 
				-                    
			
 
				-                    button.textContent = `生成中 (0/${segments.length})...`;
			
 
				-                    
			
 
				-                    // 隐藏保存按钮
			
 
				-                    const buttons = document.querySelector('.save-buttons');
			
 
				-                    buttons.style.visibility = 'hidden';
			
 
				-                    
			
 
				-                    // 为每个分段生成图片
			
 
				-                    const images = [];
			
 
				-                    for (let i = 0; i < segments.length; i++) {
			
 
				-                        const segment = segments[i];
			
 
				-                        button.textContent = `生成中 (${i + 1}/${segments.length})...`;
			
 
				-                        
			
 
				-                        // 创建临时容器用于截图
			
 
				-                        const tempContainer = document.createElement('div');
			
 
				-                        tempContainer.style.cssText = `
			
 
				-                            position: absolute;
			
 
				-                            left: -9999px;
			
 
				-                            top: 0;
			
 
				-                            width: ${container.offsetWidth}px;
			
 
				-                            background: white;
			
 
				-                        `;
			
 
				-                        tempContainer.className = 'container';
			
 
				-                        
			
 
				-                        // 克隆容器内容
			
 
				-                        const clonedContainer = container.cloneNode(true);
			
 
				-                        
			
 
				-                        // 移除克隆内容中的保存按钮
			
 
				-                        const clonedButtons = clonedContainer.querySelector('.save-buttons');
			
 
				-                        if (clonedButtons) {
			
 
				-                            clonedButtons.style.display = 'none';
			
 
				-                        }
			
 
				-                        
			
 
				-                        tempContainer.appendChild(clonedContainer);
			
 
				-                        document.body.appendChild(tempContainer);
			
 
				-                        
			
 
				-                        // 等待DOM更新
			
 
				-                        await new Promise(resolve => setTimeout(resolve, 100));
			
 
				-                        
			
 
				-                        // 使用html2canvas截取特定区域
			
 
				-                        const canvas = await html2canvas(clonedContainer, {
			
 
				-                            backgroundColor: '#ffffff',
			
 
				-                            scale: scale,
			
 
				-                            useCORS: true,
			
 
				-                            allowTaint: false,
			
 
				-                            imageTimeout: 10000,
			
 
				-                            logging: false,
			
 
				-                            width: container.offsetWidth,
			
 
				-                            height: segment.end - segment.start,
			
 
				-                            x: 0,
			
 
				-                            y: segment.start,
			
 
				-                            windowWidth: window.innerWidth,
			
 
				-                            windowHeight: window.innerHeight
			
 
				-                        });
			
 
				-                        
			
 
				-                        images.push(canvas.toDataURL('image/png', 1.0));
			
 
				-                        
			
 
				-                        // 清理临时容器
			
 
				-                        document.body.removeChild(tempContainer);
			
 
				-                    }
			
 
				-                    
			
 
				-                    // 恢复按钮显示
			
 
				-                    buttons.style.visibility = 'visible';
			
 
				-                    
			
 
				-                    // 下载所有图片
			
 
				-                    const now = new Date();
			
 
				-                    const baseFilename = `TrendRadar_热点新闻分析_${now.getFullYear()}${String(now.getMonth() + 1).padStart(2, '0')}${String(now.getDate()).padStart(2, '0')}_${String(now.getHours()).padStart(2, '0')}${String(now.getMinutes()).padStart(2, '0')}`;
			
 
				-                    
			
 
				-                    for (let i = 0; i < images.length; i++) {
			
 
				-                        const link = document.createElement('a');
			
 
				-                        link.download = `${baseFilename}_part${i + 1}.png`;
			
 
				-                        link.href = images[i];
			
 
				-                        document.body.appendChild(link);
			
 
				-                        link.click();
			
 
				-                        document.body.removeChild(link);
			
 
				-                        
			
 
				-                        // 延迟一下避免浏览器阻止多个下载
			
 
				-                        await new Promise(resolve => setTimeout(resolve, 100));
			
 
				-                    }
			
 
				-                    
			
 
				-                    button.textContent = `已保存 ${segments.length} 张图片!`;
			
 
				-                    setTimeout(() => {
			
 
				-                        button.textContent = originalText;
			
 
				-                        button.disabled = false;
			
 
				-                    }, 2000);
			
 
				-                    
			
 
				-                } catch (error) {
			
 
				-                    console.error('分段保存失败:', error);
			
 
				-                    const buttons = document.querySelector('.save-buttons');
			
 
				-                    buttons.style.visibility = 'visible';
			
 
				-                    button.textContent = '保存失败';
			
 
				-                    setTimeout(() => {
			
 
				-                        button.textContent = originalText;
			
 
				-                        button.disabled = false;
			
 
				-                    }, 2000);
			
 
				-                }
			
 
				-            }
			
 
				-            
			
 
				-            document.addEventListener('DOMContentLoaded', function() {
			
 
				-                window.scrollTo(0, 0);
			
 
				-            });
			
 
				-        </script>
			
 
				-    </body>
			
 
				-    </html>
			
 
				-    """
			
 
				-
			
 
				-    return html
			
 
				-
			
 
				-
			
 
				-def render_feishu_content(
			
 
				-    report_data: Dict, update_info: Optional[Dict] = None, mode: str = "daily"
			
 
				-) -> str:
			
 
				-    """渲染飞书内容"""
			
 
				-    # 生成热点词汇统计部分
			
 
				-    stats_content = ""
			
 
				-    if report_data["stats"]:
			
 
				-        stats_content += f"📊 **热点词汇统计**\n\n"
			
 
				-
			
 
				-        total_count = len(report_data["stats"])
			
 
				-
			
 
				-        for i, stat in enumerate(report_data["stats"]):
			
 
				-            word = stat["word"]
			
 
				-            count = stat["count"]
			
 
				-
			
 
				-            sequence_display = f"<font color='grey'>[{i + 1}/{total_count}]</font>"
			
 
				-
			
 
				-            if count >= 10:
			
 
				-                stats_content += f"🔥 {sequence_display} **{word}** : <font color='red'>{count}</font> 条\n\n"
			
 
				-            elif count >= 5:
			
 
				-                stats_content += f"📈 {sequence_display} **{word}** : <font color='orange'>{count}</font> 条\n\n"
			
 
				-            else:
			
 
				-                stats_content += f"📌 {sequence_display} **{word}** : {count} 条\n\n"
			
 
				-
			
 
				-            for j, title_data in enumerate(stat["titles"], 1):
			
 
				-                formatted_title = format_title_for_platform(
			
 
				-                    "feishu", title_data, show_source=True
			
 
				-                )
			
 
				-                stats_content += f"  {j}. {formatted_title}\n"
			
 
				-
			
 
				-                if j < len(stat["titles"]):
			
 
				-                    stats_content += "\n"
			
 
				-
			
 
				-            if i < len(report_data["stats"]) - 1:
			
 
				-                stats_content += f"\n{CONFIG['FEISHU_MESSAGE_SEPARATOR']}\n\n"
			
 
				-
			
 
				-    # 生成新增新闻部分
			
 
				-    new_titles_content = ""
			
 
				-    if report_data["new_titles"]:
			
 
				-        new_titles_content += (
			
 
				-            f"🆕 **本次新增热点新闻** (共 {report_data['total_new_count']} 条)\n\n"
			
 
				-        )
			
 
				-
			
 
				-        for source_data in report_data["new_titles"]:
			
 
				-            new_titles_content += (
			
 
				-                f"**{source_data['source_name']}** ({len(source_data['titles'])} 条):\n"
			
 
				-            )
			
 
				-
			
 
				-            for j, title_data in enumerate(source_data["titles"], 1):
			
 
				-                title_data_copy = title_data.copy()
			
 
				-                title_data_copy["is_new"] = False
			
 
				-                formatted_title = format_title_for_platform(
			
 
				-                    "feishu", title_data_copy, show_source=False
			
 
				-                )
			
 
				-                new_titles_content += f"  {j}. {formatted_title}\n"
			
 
				-
			
 
				-            new_titles_content += "\n"
			
 
				-
			
 
				-    # 根据配置决定内容顺序
			
 
				-    text_content = ""
			
 
				-    if CONFIG.get("REVERSE_CONTENT_ORDER", False):
			
 
				-        # 新增热点在前，热点词汇统计在后
			
 
				-        if new_titles_content:
			
 
				-            text_content += new_titles_content
			
 
				-            if stats_content:
			
 
				-                text_content += f"\n{CONFIG['FEISHU_MESSAGE_SEPARATOR']}\n\n"
			
 
				-        if stats_content:
			
 
				-            text_content += stats_content
			
 
				-    else:
			
 
				-        # 默认：热点词汇统计在前，新增热点在后
			
 
				-        if stats_content:
			
 
				-            text_content += stats_content
			
 
				-            if new_titles_content:
			
 
				-                text_content += f"\n{CONFIG['FEISHU_MESSAGE_SEPARATOR']}\n\n"
			
 
				-        if new_titles_content:
			
 
				-            text_content += new_titles_content
			
 
				-
			
 
				-    if not text_content:
			
 
				-        if mode == "incremental":
			
 
				-            mode_text = "增量模式下暂无新增匹配的热点词汇"
			
 
				-        elif mode == "current":
			
 
				-            mode_text = "当前榜单模式下暂无匹配的热点词汇"
			
 
				-        else:
			
 
				-            mode_text = "暂无匹配的热点词汇"
			
 
				-        text_content = f"📭 {mode_text}\n\n"
			
 
				-
			
 
				-    if report_data["failed_ids"]:
			
 
				-        if text_content and "暂无匹配" not in text_content:
			
 
				-            text_content += f"\n{CONFIG['FEISHU_MESSAGE_SEPARATOR']}\n\n"
			
 
				-
			
 
				-        text_content += "⚠️ **数据获取失败的平台：**\n\n"
			
 
				-        for i, id_value in enumerate(report_data["failed_ids"], 1):
			
 
				-            text_content += f"  • <font color='red'>{id_value}</font>\n"
			
 
				-
			
 
				-    now = get_beijing_time()
			
 
				-    text_content += (
			
 
				-        f"\n\n<font color='grey'>更新时间：{now.strftime('%Y-%m-%d %H:%M:%S')}</font>"
			
 
				-    )
			
 
				-
			
 
				-    if update_info:
			
 
				-        text_content += f"\n<font color='grey'>TrendRadar 发现新版本 {update_info['remote_version']}，当前 {update_info['current_version']}</font>"
			
 
				-
			
 
				-    return text_content
			
 
				-
			
 
				-
			
 
				-def render_dingtalk_content(
			
 
				-    report_data: Dict, update_info: Optional[Dict] = None, mode: str = "daily"
			
 
				-) -> str:
			
 
				-    """渲染钉钉内容"""
			
 
				-    total_titles = sum(
			
 
				-        len(stat["titles"]) for stat in report_data["stats"] if stat["count"] > 0
			
 
				-    )
			
 
				-    now = get_beijing_time()
			
 
				-
			
 
				-    # 头部信息
			
 
				-    header_content = f"**总新闻数：** {total_titles}\n\n"
			
 
				-    header_content += f"**时间：** {now.strftime('%Y-%m-%d %H:%M:%S')}\n\n"
			
 
				-    header_content += f"**类型：** 热点分析报告\n\n"
			
 
				-    header_content += "---\n\n"
			
 
				-
			
 
				-    # 生成热点词汇统计部分
			
 
				-    stats_content = ""
			
 
				-    if report_data["stats"]:
			
 
				-        stats_content += f"📊 **热点词汇统计**\n\n"
			
 
				-
			
 
				-        total_count = len(report_data["stats"])
			
 
				-
			
 
				-        for i, stat in enumerate(report_data["stats"]):
			
 
				-            word = stat["word"]
			
 
				-            count = stat["count"]
			
 
				-
			
 
				-            sequence_display = f"[{i + 1}/{total_count}]"
			
 
				-
			
 
				-            if count >= 10:
			
 
				-                stats_content += f"🔥 {sequence_display} **{word}** : **{count}** 条\n\n"
			
 
				-            elif count >= 5:
			
 
				-                stats_content += f"📈 {sequence_display} **{word}** : **{count}** 条\n\n"
			
 
				-            else:
			
 
				-                stats_content += f"📌 {sequence_display} **{word}** : {count} 条\n\n"
			
 
				-
			
 
				-            for j, title_data in enumerate(stat["titles"], 1):
			
 
				-                formatted_title = format_title_for_platform(
			
 
				-                    "dingtalk", title_data, show_source=True
			
 
				-                )
			
 
				-                stats_content += f"  {j}. {formatted_title}\n"
			
 
				-
			
 
				-                if j < len(stat["titles"]):
			
 
				-                    stats_content += "\n"
			
 
				-
			
 
				-            if i < len(report_data["stats"]) - 1:
			
 
				-                stats_content += f"\n---\n\n"
			
 
				-
			
 
				-    # 生成新增新闻部分
			
 
				-    new_titles_content = ""
			
 
				-    if report_data["new_titles"]:
			
 
				-        new_titles_content += (
			
 
				-            f"🆕 **本次新增热点新闻** (共 {report_data['total_new_count']} 条)\n\n"
			
 
				-        )
			
 
				-
			
 
				-        for source_data in report_data["new_titles"]:
			
 
				-            new_titles_content += f"**{source_data['source_name']}** ({len(source_data['titles'])} 条):\n\n"
			
 
				-
			
 
				-            for j, title_data in enumerate(source_data["titles"], 1):
			
 
				-                title_data_copy = title_data.copy()
			
 
				-                title_data_copy["is_new"] = False
			
 
				-                formatted_title = format_title_for_platform(
			
 
				-                    "dingtalk", title_data_copy, show_source=False
			
 
				-                )
			
 
				-                new_titles_content += f"  {j}. {formatted_title}\n"
			
 
				-
			
 
				-            new_titles_content += "\n"
			
 
				-
			
 
				-    # 根据配置决定内容顺序
			
 
				-    text_content = header_content
			
 
				-    if CONFIG.get("REVERSE_CONTENT_ORDER", False):
			
 
				-        # 新增热点在前，热点词汇统计在后
			
 
				-        if new_titles_content:
			
 
				-            text_content += new_titles_content
			
 
				-            if stats_content:
			
 
				-                text_content += f"\n---\n\n"
			
 
				-        if stats_content:
			
 
				-            text_content += stats_content
			
 
				-    else:
			
 
				-        # 默认：热点词汇统计在前，新增热点在后
			
 
				-        if stats_content:
			
 
				-            text_content += stats_content
			
 
				-            if new_titles_content:
			
 
				-                text_content += f"\n---\n\n"
			
 
				-        if new_titles_content:
			
 
				-            text_content += new_titles_content
			
 
				-
			
 
				-    if not stats_content and not new_titles_content:
			
 
				-        if mode == "incremental":
			
 
				-            mode_text = "增量模式下暂无新增匹配的热点词汇"
			
 
				-        elif mode == "current":
			
 
				-            mode_text = "当前榜单模式下暂无匹配的热点词汇"
			
 
				-        else:
			
 
				-            mode_text = "暂无匹配的热点词汇"
			
 
				-        text_content += f"📭 {mode_text}\n\n"
			
 
				-
			
 
				-    if report_data["failed_ids"]:
			
 
				-        if "暂无匹配" not in text_content:
			
 
				-            text_content += f"\n---\n\n"
			
 
				-
			
 
				-        text_content += "⚠️ **数据获取失败的平台：**\n\n"
			
 
				-        for i, id_value in enumerate(report_data["failed_ids"], 1):
			
 
				-            text_content += f"  • **{id_value}**\n"
			
 
				-
			
 
				-    text_content += f"\n\n> 更新时间：{now.strftime('%Y-%m-%d %H:%M:%S')}"
			
 
				-
			
 
				-    if update_info:
			
 
				-        text_content += f"\n> TrendRadar 发现新版本 **{update_info['remote_version']}**，当前 **{update_info['current_version']}**"
			
 
				-
			
 
				-    return text_content
			
 
				-
			
 
				-
			
 
				-def _get_batch_header(format_type: str, batch_num: int, total_batches: int) -> str:
			
 
				-    """根据 format_type 生成对应格式的批次头部"""
			
 
				-    if format_type == "telegram":
			
 
				-        return f"<b>[第 {batch_num}/{total_batches} 批次]</b>\n\n"
			
 
				-    elif format_type == "slack":
			
 
				-        return f"*[第 {batch_num}/{total_batches} 批次]*\n\n"
			
 
				-    elif format_type in ("wework_text", "bark"):
			
 
				-        # 企业微信文本模式和 Bark 使用纯文本格式
			
 
				-        return f"[第 {batch_num}/{total_batches} 批次]\n\n"
			
 
				-    else:
			
 
				-        # 飞书、钉钉、ntfy、企业微信 markdown 模式
			
 
				-        return f"**[第 {batch_num}/{total_batches} 批次]**\n\n"
			
 
				-
			
 
				-
			
 
				-def _get_max_batch_header_size(format_type: str) -> int:
			
 
				-    """估算批次头部的最大字节数（假设最多 99 批次）
			
 
				-
			
 
				-    用于在分批时预留空间，避免事后截断破坏内容完整性。
			
 
				-    """
			
 
				-    # 生成最坏情况的头部（99/99 批次）
			
 
				-    max_header = _get_batch_header(format_type, 99, 99)
			
 
				-    return len(max_header.encode("utf-8"))
			
 
				-
			
 
				-
			
 
				-def _truncate_to_bytes(text: str, max_bytes: int) -> str:
			
 
				-    """安全截断字符串到指定字节数，避免截断多字节字符"""
			
 
				-    text_bytes = text.encode("utf-8")
			
 
				-    if len(text_bytes) <= max_bytes:
			
 
				-        return text
			
 
				-
			
 
				-    # 截断到指定字节数
			
 
				-    truncated = text_bytes[:max_bytes]
			
 
				-
			
 
				-    # 处理可能的不完整 UTF-8 字符
			
 
				-    for i in range(min(4, len(truncated))):
			
 
				-        try:
			
 
				-            return truncated[: len(truncated) - i].decode("utf-8")
			
 
				-        except UnicodeDecodeError:
			
 
				-            continue
			
 
				-
			
 
				-    # 极端情况：返回空字符串
			
 
				-    return ""
			
 
				-
			
 
				-
			
 
				-def add_batch_headers(
			
 
				-    batches: List[str], format_type: str, max_bytes: int
			
 
				-) -> List[str]:
			
 
				-    """为批次添加头部，动态计算确保总大小不超过限制
			
 
				-
			
 
				-    Args:
			
 
				-        batches: 原始批次列表
			
 
				-        format_type: 推送类型（bark, telegram, feishu 等）
			
 
				-        max_bytes: 该推送类型的最大字节限制
			
 
				-
			
 
				-    Returns:
			
 
				-        添加头部后的批次列表
			
 
				-    """
			
 
				-    if len(batches) <= 1:
			
 
				-        return batches
			
 
				-
			
 
				-    total = len(batches)
			
 
				-    result = []
			
 
				-
			
 
				-    for i, content in enumerate(batches, 1):
			
 
				-        # 生成批次头部
			
 
				-        header = _get_batch_header(format_type, i, total)
			
 
				-        header_size = len(header.encode("utf-8"))
			
 
				-
			
 
				-        # 动态计算允许的最大内容大小
			
 
				-        max_content_size = max_bytes - header_size
			
 
				-        content_size = len(content.encode("utf-8"))
			
 
				-
			
 
				-        # 如果超出，截断到安全大小
			
 
				-        if content_size > max_content_size:
			
 
				-            print(
			
 
				-                f"警告：{format_type} 第 {i}/{total} 批次内容({content_size}字节) + 头部({header_size}字节) 超出限制({max_bytes}字节)，截断到 {max_content_size} 字节"
			
 
				-            )
			
 
				-            content = _truncate_to_bytes(content, max_content_size)
			
 
				-
			
 
				-        result.append(header + content)
			
 
				-
			
 
				-    return result
			
 
				-
			
 
				-
			
 
				-def split_content_into_batches(
			
 
				-    report_data: Dict,
			
 
				-    format_type: str,
			
 
				-    update_info: Optional[Dict] = None,
			
 
				-    max_bytes: int = None,
			
 
				-    mode: str = "daily",
			
 
				-) -> List[str]:
			
 
				-    """分批处理消息内容，确保词组标题+至少第一条新闻的完整性"""
			
 
				-    if max_bytes is None:
			
 
				-        if format_type == "dingtalk":
			
 
				-            max_bytes = CONFIG.get("DINGTALK_BATCH_SIZE", 20000)
			
 
				-        elif format_type == "feishu":
			
 
				-            max_bytes = CONFIG.get("FEISHU_BATCH_SIZE", 29000)
			
 
				-        elif format_type == "ntfy":
			
 
				-            max_bytes = 3800
			
 
				-        else:
			
 
				-            max_bytes = CONFIG.get("MESSAGE_BATCH_SIZE", 4000)
			
 
				-
			
 
				-    batches = []
			
 
				-
			
 
				-    total_titles = sum(
			
 
				-        len(stat["titles"]) for stat in report_data["stats"] if stat["count"] > 0
			
 
				-    )
			
 
				-    now = get_beijing_time()
			
 
				-
			
 
				-    base_header = ""
			
 
				-    if format_type in ("wework", "bark"):
			
 
				-        base_header = f"**总新闻数：** {total_titles}\n\n\n\n"
			
 
				-    elif format_type == "telegram":
			
 
				-        base_header = f"总新闻数： {total_titles}\n\n"
			
 
				-    elif format_type == "ntfy":
			
 
				-        base_header = f"**总新闻数：** {total_titles}\n\n"
			
 
				-    elif format_type == "feishu":
			
 
				-        base_header = ""
			
 
				-    elif format_type == "dingtalk":
			
 
				-        base_header = f"**总新闻数：** {total_titles}\n\n"
			
 
				-        base_header += f"**时间：** {now.strftime('%Y-%m-%d %H:%M:%S')}\n\n"
			
 
				-        base_header += f"**类型：** 热点分析报告\n\n"
			
 
				-        base_header += "---\n\n"
			
 
				-    elif format_type == "slack":
			
 
				-        base_header = f"*总新闻数：* {total_titles}\n\n"
			
 
				-
			
 
				-    base_footer = ""
			
 
				-    if format_type in ("wework", "bark"):
			
 
				-        base_footer = f"\n\n\n> 更新时间：{now.strftime('%Y-%m-%d %H:%M:%S')}"
			
 
				-        if update_info:
			
 
				-            base_footer += f"\n> TrendRadar 发现新版本 **{update_info['remote_version']}**，当前 **{update_info['current_version']}**"
			
 
				-    elif format_type == "telegram":
			
 
				-        base_footer = f"\n\n更新时间：{now.strftime('%Y-%m-%d %H:%M:%S')}"
			
 
				-        if update_info:
			
 
				-            base_footer += f"\nTrendRadar 发现新版本 {update_info['remote_version']}，当前 {update_info['current_version']}"
			
 
				-    elif format_type == "ntfy":
			
 
				-        base_footer = f"\n\n> 更新时间：{now.strftime('%Y-%m-%d %H:%M:%S')}"
			
 
				-        if update_info:
			
 
				-            base_footer += f"\n> TrendRadar 发现新版本 **{update_info['remote_version']}**，当前 **{update_info['current_version']}**"
			
 
				-    elif format_type == "feishu":
			
 
				-        base_footer = f"\n\n<font color='grey'>更新时间：{now.strftime('%Y-%m-%d %H:%M:%S')}</font>"
			
 
				-        if update_info:
			
 
				-            base_footer += f"\n<font color='grey'>TrendRadar 发现新版本 {update_info['remote_version']}，当前 {update_info['current_version']}</font>"
			
 
				-    elif format_type == "dingtalk":
			
 
				-        base_footer = f"\n\n> 更新时间：{now.strftime('%Y-%m-%d %H:%M:%S')}"
			
 
				-        if update_info:
			
 
				-            base_footer += f"\n> TrendRadar 发现新版本 **{update_info['remote_version']}**，当前 **{update_info['current_version']}**"
			
 
				-    elif format_type == "slack":
			
 
				-        base_footer = f"\n\n_更新时间：{now.strftime('%Y-%m-%d %H:%M:%S')}_"
			
 
				-        if update_info:
			
 
				-            base_footer += f"\n_TrendRadar 发现新版本 *{update_info['remote_version']}*，当前 *{update_info['current_version']}_"
			
 
				-
			
 
				-    stats_header = ""
			
 
				-    if report_data["stats"]:
			
 
				-        if format_type in ("wework", "bark"):
			
 
				-            stats_header = f"📊 **热点词汇统计**\n\n"
			
 
				-        elif format_type == "telegram":
			
 
				-            stats_header = f"📊 热点词汇统计\n\n"
			
 
				-        elif format_type == "ntfy":
			
 
				-            stats_header = f"📊 **热点词汇统计**\n\n"
			
 
				-        elif format_type == "feishu":
			
 
				-            stats_header = f"📊 **热点词汇统计**\n\n"
			
 
				-        elif format_type == "dingtalk":
			
 
				-            stats_header = f"📊 **热点词汇统计**\n\n"
			
 
				-        elif format_type == "slack":
			
 
				-            stats_header = f"📊 *热点词汇统计*\n\n"
			
 
				-
			
 
				-    current_batch = base_header
			
 
				-    current_batch_has_content = False
			
 
				-
			
 
				-    if (
			
 
				-        not report_data["stats"]
			
 
				-        and not report_data["new_titles"]
			
 
				-        and not report_data["failed_ids"]
			
 
				-    ):
			
 
				-        if mode == "incremental":
			
 
				-            mode_text = "增量模式下暂无新增匹配的热点词汇"
			
 
				-        elif mode == "current":
			
 
				-            mode_text = "当前榜单模式下暂无匹配的热点词汇"
			
 
				-        else:
			
 
				-            mode_text = "暂无匹配的热点词汇"
			
 
				-        simple_content = f"📭 {mode_text}\n\n"
			
 
				-        final_content = base_header + simple_content + base_footer
			
 
				-        batches.append(final_content)
			
 
				-        return batches
			
 
				-
			
 
				-    # 定义处理热点词汇统计的函数
			
 
				-    def process_stats_section(current_batch, current_batch_has_content, batches):
			
 
				-        """处理热点词汇统计"""
			
 
				-        if not report_data["stats"]:
			
 
				-            return current_batch, current_batch_has_content, batches
			
 
				-
			
 
				-        total_count = len(report_data["stats"])
			
 
				-
			
 
				-        # 添加统计标题
			
 
				-        test_content = current_batch + stats_header
			
 
				-        if (
			
 
				-            len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
			
 
				-            < max_bytes
			
 
				-        ):
			
 
				-            current_batch = test_content
			
 
				-            current_batch_has_content = True
			
 
				-        else:
			
 
				-            if current_batch_has_content:
			
 
				-                batches.append(current_batch + base_footer)
			
 
				-            current_batch = base_header + stats_header
			
 
				-            current_batch_has_content = True
			
 
				-
			
 
				-        # 逐个处理词组（确保词组标题+第一条新闻的原子性）
			
 
				-        for i, stat in enumerate(report_data["stats"]):
			
 
				-            word = stat["word"]
			
 
				-            count = stat["count"]
			
 
				-            sequence_display = f"[{i + 1}/{total_count}]"
			
 
				-
			
 
				-            # 构建词组标题
			
 
				-            word_header = ""
			
 
				-            if format_type in ("wework", "bark"):
			
 
				-                if count >= 10:
			
 
				-                    word_header = (
			
 
				-                        f"🔥 {sequence_display} **{word}** : **{count}** 条\n\n"
			
 
				-                    )
			
 
				-                elif count >= 5:
			
 
				-                    word_header = (
			
 
				-                        f"📈 {sequence_display} **{word}** : **{count}** 条\n\n"
			
 
				-                    )
			
 
				-                else:
			
 
				-                    word_header = f"📌 {sequence_display} **{word}** : {count} 条\n\n"
			
 
				-            elif format_type == "telegram":
			
 
				-                if count >= 10:
			
 
				-                    word_header = f"🔥 {sequence_display} {word} : {count} 条\n\n"
			
 
				-                elif count >= 5:
			
 
				-                    word_header = f"📈 {sequence_display} {word} : {count} 条\n\n"
			
 
				-                else:
			
 
				-                    word_header = f"📌 {sequence_display} {word} : {count} 条\n\n"
			
 
				-            elif format_type == "ntfy":
			
 
				-                if count >= 10:
			
 
				-                    word_header = (
			
 
				-                        f"🔥 {sequence_display} **{word}** : **{count}** 条\n\n"
			
 
				-                    )
			
 
				-                elif count >= 5:
			
 
				-                    word_header = (
			
 
				-                        f"📈 {sequence_display} **{word}** : **{count}** 条\n\n"
			
 
				-                    )
			
 
				-                else:
			
 
				-                    word_header = f"📌 {sequence_display} **{word}** : {count} 条\n\n"
			
 
				-            elif format_type == "feishu":
			
 
				-                if count >= 10:
			
 
				-                    word_header = f"🔥 <font color='grey'>{sequence_display}</font> **{word}** : <font color='red'>{count}</font> 条\n\n"
			
 
				-                elif count >= 5:
			
 
				-                    word_header = f"📈 <font color='grey'>{sequence_display}</font> **{word}** : <font color='orange'>{count}</font> 条\n\n"
			
 
				-                else:
			
 
				-                    word_header = f"📌 <font color='grey'>{sequence_display}</font> **{word}** : {count} 条\n\n"
			
 
				-            elif format_type == "dingtalk":
			
 
				-                if count >= 10:
			
 
				-                    word_header = (
			
 
				-                        f"🔥 {sequence_display} **{word}** : **{count}** 条\n\n"
			
 
				-                    )
			
 
				-                elif count >= 5:
			
 
				-                    word_header = (
			
 
				-                        f"📈 {sequence_display} **{word}** : **{count}** 条\n\n"
			
 
				-                    )
			
 
				-                else:
			
 
				-                    word_header = f"📌 {sequence_display} **{word}** : {count} 条\n\n"
			
 
				-            elif format_type == "slack":
			
 
				-                if count >= 10:
			
 
				-                    word_header = (
			
 
				-                        f"🔥 {sequence_display} *{word}* : *{count}* 条\n\n"
			
 
				-                    )
			
 
				-                elif count >= 5:
			
 
				-                    word_header = (
			
 
				-                        f"📈 {sequence_display} *{word}* : *{count}* 条\n\n"
			
 
				-                    )
			
 
				-                else:
			
 
				-                    word_header = f"📌 {sequence_display} *{word}* : {count} 条\n\n"
			
 
				-
			
 
				-            # 构建第一条新闻
			
 
				-            first_news_line = ""
			
 
				-            if stat["titles"]:
			
 
				-                first_title_data = stat["titles"][0]
			
 
				-                if format_type in ("wework", "bark"):
			
 
				-                    formatted_title = format_title_for_platform(
			
 
				-                        "wework", first_title_data, show_source=True
			
 
				-                    )
			
 
				-                elif format_type == "telegram":
			
 
				-                    formatted_title = format_title_for_platform(
			
 
				-                        "telegram", first_title_data, show_source=True
			
 
				-                    )
			
 
				-                elif format_type == "ntfy":
			
 
				-                    formatted_title = format_title_for_platform(
			
 
				-                        "ntfy", first_title_data, show_source=True
			
 
				-                    )
			
 
				-                elif format_type == "feishu":
			
 
				-                    formatted_title = format_title_for_platform(
			
 
				-                        "feishu", first_title_data, show_source=True
			
 
				-                    )
			
 
				-                elif format_type == "dingtalk":
			
 
				-                    formatted_title = format_title_for_platform(
			
 
				-                        "dingtalk", first_title_data, show_source=True
			
 
				-                    )
			
 
				-                elif format_type == "slack":
			
 
				-                    formatted_title = format_title_for_platform(
			
 
				-                        "slack", first_title_data, show_source=True
			
 
				-                    )
			
 
				-                else:
			
 
				-                    formatted_title = f"{first_title_data['title']}"
			
 
				-
			
 
				-                first_news_line = f"  1. {formatted_title}\n"
			
 
				-                if len(stat["titles"]) > 1:
			
 
				-                    first_news_line += "\n"
			
 
				-
			
 
				-            # 原子性检查：词组标题+第一条新闻必须一起处理
			
 
				-            word_with_first_news = word_header + first_news_line
			
 
				-            test_content = current_batch + word_with_first_news
			
 
				-
			
 
				-            if (
			
 
				-                len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
			
 
				-                >= max_bytes
			
 
				-            ):
			
 
				-                # 当前批次容纳不下，开启新批次
			
 
				-                if current_batch_has_content:
			
 
				-                    batches.append(current_batch + base_footer)
			
 
				-                current_batch = base_header + stats_header + word_with_first_news
			
 
				-                current_batch_has_content = True
			
 
				-                start_index = 1
			
 
				-            else:
			
 
				-                current_batch = test_content
			
 
				-                current_batch_has_content = True
			
 
				-                start_index = 1
			
 
				-
			
 
				-            # 处理剩余新闻条目
			
 
				-            for j in range(start_index, len(stat["titles"])):
			
 
				-                title_data = stat["titles"][j]
			
 
				-                if format_type in ("wework", "bark"):
			
 
				-                    formatted_title = format_title_for_platform(
			
 
				-                        "wework", title_data, show_source=True
			
 
				-                    )
			
 
				-                elif format_type == "telegram":
			
 
				-                    formatted_title = format_title_for_platform(
			
 
				-                        "telegram", title_data, show_source=True
			
 
				-                    )
			
 
				-                elif format_type == "ntfy":
			
 
				-                    formatted_title = format_title_for_platform(
			
 
				-                        "ntfy", title_data, show_source=True
			
 
				-                    )
			
 
				-                elif format_type == "feishu":
			
 
				-                    formatted_title = format_title_for_platform(
			
 
				-                        "feishu", title_data, show_source=True
			
 
				-                    )
			
 
				-                elif format_type == "dingtalk":
			
 
				-                    formatted_title = format_title_for_platform(
			
 
				-                        "dingtalk", title_data, show_source=True
			
 
				-                    )
			
 
				-                elif format_type == "slack":
			
 
				-                    formatted_title = format_title_for_platform(
			
 
				-                        "slack", title_data, show_source=True
			
 
				-                    )
			
 
				-                else:
			
 
				-                    formatted_title = f"{title_data['title']}"
			
 
				-
			
 
				-                news_line = f"  {j + 1}. {formatted_title}\n"
			
 
				-                if j < len(stat["titles"]) - 1:
			
 
				-                    news_line += "\n"
			
 
				-
			
 
				-                test_content = current_batch + news_line
			
 
				-                if (
			
 
				-                    len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
			
 
				-                    >= max_bytes
			
 
				-                ):
			
 
				-                    if current_batch_has_content:
			
 
				-                        batches.append(current_batch + base_footer)
			
 
				-                    current_batch = base_header + stats_header + word_header + news_line
			
 
				-                    current_batch_has_content = True
			
 
				-                else:
			
 
				-                    current_batch = test_content
			
 
				-                    current_batch_has_content = True
			
 
				-
			
 
				-            # 词组间分隔符
			
 
				-            if i < len(report_data["stats"]) - 1:
			
 
				-                separator = ""
			
 
				-                if format_type in ("wework", "bark"):
			
 
				-                    separator = f"\n\n\n\n"
			
 
				-                elif format_type == "telegram":
			
 
				-                    separator = f"\n\n"
			
 
				-                elif format_type == "ntfy":
			
 
				-                    separator = f"\n\n"
			
 
				-                elif format_type == "feishu":
			
 
				-                    separator = f"\n{CONFIG['FEISHU_MESSAGE_SEPARATOR']}\n\n"
			
 
				-                elif format_type == "dingtalk":
			
 
				-                    separator = f"\n---\n\n"
			
 
				-                elif format_type == "slack":
			
 
				-                    separator = f"\n\n"
			
 
				-
			
 
				-                test_content = current_batch + separator
			
 
				-                if (
			
 
				-                    len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
			
 
				-                    < max_bytes
			
 
				-                ):
			
 
				-                    current_batch = test_content
			
 
				-
			
 
				-        return current_batch, current_batch_has_content, batches
			
 
				-
			
 
				-    # 定义处理新增新闻的函数
			
 
				-    def process_new_titles_section(current_batch, current_batch_has_content, batches):
			
 
				-        """处理新增新闻"""
			
 
				-        if not report_data["new_titles"]:
			
 
				-            return current_batch, current_batch_has_content, batches
			
 
				-
			
 
				-        new_header = ""
			
 
				-        if format_type in ("wework", "bark"):
			
 
				-            new_header = f"\n\n\n\n🆕 **本次新增热点新闻** (共 {report_data['total_new_count']} 条)\n\n"
			
 
				-        elif format_type == "telegram":
			
 
				-            new_header = (
			
 
				-                f"\n\n🆕 本次新增热点新闻 (共 {report_data['total_new_count']} 条)\n\n"
			
 
				-            )
			
 
				-        elif format_type == "ntfy":
			
 
				-            new_header = f"\n\n🆕 **本次新增热点新闻** (共 {report_data['total_new_count']} 条)\n\n"
			
 
				-        elif format_type == "feishu":
			
 
				-            new_header = f"\n{CONFIG['FEISHU_MESSAGE_SEPARATOR']}\n\n🆕 **本次新增热点新闻** (共 {report_data['total_new_count']} 条)\n\n"
			
 
				-        elif format_type == "dingtalk":
			
 
				-            new_header = f"\n---\n\n🆕 **本次新增热点新闻** (共 {report_data['total_new_count']} 条)\n\n"
			
 
				-        elif format_type == "slack":
			
 
				-            new_header = f"\n\n🆕 *本次新增热点新闻* (共 {report_data['total_new_count']} 条)\n\n"
			
 
				-
			
 
				-        test_content = current_batch + new_header
			
 
				-        if (
			
 
				-            len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
			
 
				-            >= max_bytes
			
 
				-        ):
			
 
				-            if current_batch_has_content:
			
 
				-                batches.append(current_batch + base_footer)
			
 
				-            current_batch = base_header + new_header
			
 
				-            current_batch_has_content = True
			
 
				-        else:
			
 
				-            current_batch = test_content
			
 
				-            current_batch_has_content = True
			
 
				-
			
 
				-        # 逐个处理新增新闻来源
			
 
				-        for source_data in report_data["new_titles"]:
			
 
				-            source_header = ""
			
 
				-            if format_type in ("wework", "bark"):
			
 
				-                source_header = f"**{source_data['source_name']}** ({len(source_data['titles'])} 条):\n\n"
			
 
				-            elif format_type == "telegram":
			
 
				-                source_header = f"{source_data['source_name']} ({len(source_data['titles'])} 条):\n\n"
			
 
				-            elif format_type == "ntfy":
			
 
				-                source_header = f"**{source_data['source_name']}** ({len(source_data['titles'])} 条):\n\n"
			
 
				-            elif format_type == "feishu":
			
 
				-                source_header = f"**{source_data['source_name']}** ({len(source_data['titles'])} 条):\n\n"
			
 
				-            elif format_type == "dingtalk":
			
 
				-                source_header = f"**{source_data['source_name']}** ({len(source_data['titles'])} 条):\n\n"
			
 
				-            elif format_type == "slack":
			
 
				-                source_header = f"*{source_data['source_name']}* ({len(source_data['titles'])} 条):\n\n"
			
 
				-
			
 
				-            # 构建第一条新增新闻
			
 
				-            first_news_line = ""
			
 
				-            if source_data["titles"]:
			
 
				-                first_title_data = source_data["titles"][0]
			
 
				-                title_data_copy = first_title_data.copy()
			
 
				-                title_data_copy["is_new"] = False
			
 
				-
			
 
				-                if format_type in ("wework", "bark"):
			
 
				-                    formatted_title = format_title_for_platform(
			
 
				-                        "wework", title_data_copy, show_source=False
			
 
				-                    )
			
 
				-                elif format_type == "telegram":
			
 
				-                    formatted_title = format_title_for_platform(
			
 
				-                        "telegram", title_data_copy, show_source=False
			
 
				-                    )
			
 
				-                elif format_type == "feishu":
			
 
				-                    formatted_title = format_title_for_platform(
			
 
				-                        "feishu", title_data_copy, show_source=False
			
 
				-                    )
			
 
				-                elif format_type == "dingtalk":
			
 
				-                    formatted_title = format_title_for_platform(
			
 
				-                        "dingtalk", title_data_copy, show_source=False
			
 
				-                    )
			
 
				-                elif format_type == "slack":
			
 
				-                    formatted_title = format_title_for_platform(
			
 
				-                        "slack", title_data_copy, show_source=False
			
 
				-                    )
			
 
				-                else:
			
 
				-                    formatted_title = f"{title_data_copy['title']}"
			
 
				-
			
 
				-                first_news_line = f"  1. {formatted_title}\n"
			
 
				-
			
 
				-            # 原子性检查：来源标题+第一条新闻
			
 
				-            source_with_first_news = source_header + first_news_line
			
 
				-            test_content = current_batch + source_with_first_news
			
 
				-
			
 
				-            if (
			
 
				-                len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
			
 
				-                >= max_bytes
			
 
				-            ):
			
 
				-                if current_batch_has_content:
			
 
				-                    batches.append(current_batch + base_footer)
			
 
				-                current_batch = base_header + new_header + source_with_first_news
			
 
				-                current_batch_has_content = True
			
 
				-                start_index = 1
			
 
				-            else:
			
 
				-                current_batch = test_content
			
 
				-                current_batch_has_content = True
			
 
				-                start_index = 1
			
 
				-
			
 
				-            # 处理剩余新增新闻
			
 
				-            for j in range(start_index, len(source_data["titles"])):
			
 
				-                title_data = source_data["titles"][j]
			
 
				-                title_data_copy = title_data.copy()
			
 
				-                title_data_copy["is_new"] = False
			
 
				-
			
 
				-                if format_type == "wework":
			
 
				-                    formatted_title = format_title_for_platform(
			
 
				-                        "wework", title_data_copy, show_source=False
			
 
				-                    )
			
 
				-                elif format_type == "telegram":
			
 
				-                    formatted_title = format_title_for_platform(
			
 
				-                        "telegram", title_data_copy, show_source=False
			
 
				-                    )
			
 
				-                elif format_type == "feishu":
			
 
				-                    formatted_title = format_title_for_platform(
			
 
				-                        "feishu", title_data_copy, show_source=False
			
 
				-                    )
			
 
				-                elif format_type == "dingtalk":
			
 
				-                    formatted_title = format_title_for_platform(
			
 
				-                        "dingtalk", title_data_copy, show_source=False
			
 
				-                    )
			
 
				-                elif format_type == "slack":
			
 
				-                    formatted_title = format_title_for_platform(
			
 
				-                        "slack", title_data_copy, show_source=False
			
 
				-                    )
			
 
				-                else:
			
 
				-                    formatted_title = f"{title_data_copy['title']}"
			
 
				-
			
 
				-                news_line = f"  {j + 1}. {formatted_title}\n"
			
 
				-
			
 
				-                test_content = current_batch + news_line
			
 
				-                if (
			
 
				-                    len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
			
 
				-                    >= max_bytes
			
 
				-                ):
			
 
				-                    if current_batch_has_content:
			
 
				-                        batches.append(current_batch + base_footer)
			
 
				-                    current_batch = base_header + new_header + source_header + news_line
			
 
				-                    current_batch_has_content = True
			
 
				-                else:
			
 
				-                    current_batch = test_content
			
 
				-                    current_batch_has_content = True
			
 
				-
			
 
				-            current_batch += "\n"
			
 
				-
			
 
				-        return current_batch, current_batch_has_content, batches
			
 
				-
			
 
				-    # 根据配置决定处理顺序
			
 
				-    if CONFIG.get("REVERSE_CONTENT_ORDER", False):
			
 
				-        # 新增热点在前，热点词汇统计在后
			
 
				-        current_batch, current_batch_has_content, batches = process_new_titles_section(
			
 
				-            current_batch, current_batch_has_content, batches
			
 
				-        )
			
 
				-        current_batch, current_batch_has_content, batches = process_stats_section(
			
 
				-            current_batch, current_batch_has_content, batches
			
 
				-        )
			
 
				-    else:
			
 
				-        # 默认：热点词汇统计在前，新增热点在后
			
 
				-        current_batch, current_batch_has_content, batches = process_stats_section(
			
 
				-            current_batch, current_batch_has_content, batches
			
 
				-        )
			
 
				-        current_batch, current_batch_has_content, batches = process_new_titles_section(
			
 
				-            current_batch, current_batch_has_content, batches
			
 
				-        )
			
 
				-
			
 
				-    if report_data["failed_ids"]:
			
 
				-        failed_header = ""
			
 
				-        if format_type == "wework":
			
 
				-            failed_header = f"\n\n\n\n⚠️ **数据获取失败的平台：**\n\n"
			
 
				-        elif format_type == "telegram":
			
 
				-            failed_header = f"\n\n⚠️ 数据获取失败的平台：\n\n"
			
 
				-        elif format_type == "ntfy":
			
 
				-            failed_header = f"\n\n⚠️ **数据获取失败的平台：**\n\n"
			
 
				-        elif format_type == "feishu":
			
 
				-            failed_header = f"\n{CONFIG['FEISHU_MESSAGE_SEPARATOR']}\n\n⚠️ **数据获取失败的平台：**\n\n"
			
 
				-        elif format_type == "dingtalk":
			
 
				-            failed_header = f"\n---\n\n⚠️ **数据获取失败的平台：**\n\n"
			
 
				-
			
 
				-        test_content = current_batch + failed_header
			
 
				-        if (
			
 
				-            len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
			
 
				-            >= max_bytes
			
 
				-        ):
			
 
				-            if current_batch_has_content:
			
 
				-                batches.append(current_batch + base_footer)
			
 
				-            current_batch = base_header + failed_header
			
 
				-            current_batch_has_content = True
			
 
				-        else:
			
 
				-            current_batch = test_content
			
 
				-            current_batch_has_content = True
			
 
				-
			
 
				-        for i, id_value in enumerate(report_data["failed_ids"], 1):
			
 
				-            if format_type == "feishu":
			
 
				-                failed_line = f"  • <font color='red'>{id_value}</font>\n"
			
 
				-            elif format_type == "dingtalk":
			
 
				-                failed_line = f"  • **{id_value}**\n"
			
 
				-            else:
			
 
				-                failed_line = f"  • {id_value}\n"
			
 
				-
			
 
				-            test_content = current_batch + failed_line
			
 
				-            if (
			
 
				-                len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
			
 
				-                >= max_bytes
			
 
				-            ):
			
 
				-                if current_batch_has_content:
			
 
				-                    batches.append(current_batch + base_footer)
			
 
				-                current_batch = base_header + failed_header + failed_line
			
 
				-                current_batch_has_content = True
			
 
				-            else:
			
 
				-                current_batch = test_content
			
 
				-                current_batch_has_content = True
			
 
				-
			
 
				-    # 完成最后批次
			
 
				-    if current_batch_has_content:
			
 
				-        batches.append(current_batch + base_footer)
			
 
				-
			
 
				-    return batches
			
 
				-
			
 
				-
			
 
				-def send_to_notifications(
			
 
				-    stats: List[Dict],
			
 
				-    failed_ids: Optional[List] = None,
			
 
				-    report_type: str = "当日汇总",
			
 
				-    new_titles: Optional[Dict] = None,
			
 
				-    id_to_name: Optional[Dict] = None,
			
 
				-    update_info: Optional[Dict] = None,
			
 
				-    proxy_url: Optional[str] = None,
			
 
				-    mode: str = "daily",
			
 
				-    html_file_path: Optional[str] = None,
			
 
				-) -> Dict[str, bool]:
			
 
				-    """发送数据到多个通知平台（支持多账号）"""
			
 
				-    results = {}
			
 
				-    max_accounts = CONFIG["MAX_ACCOUNTS_PER_CHANNEL"]
			
 
				-
			
 
				-    if CONFIG["PUSH_WINDOW"]["ENABLED"]:
			
 
				-        push_manager = PushRecordManager()
			
 
				-        time_range_start = CONFIG["PUSH_WINDOW"]["TIME_RANGE"]["START"]
			
 
				-        time_range_end = CONFIG["PUSH_WINDOW"]["TIME_RANGE"]["END"]
			
 
				-
			
 
				-        if not push_manager.is_in_time_range(time_range_start, time_range_end):
			
 
				-            now = get_beijing_time()
			
 
				-            print(
			
 
				-                f"推送窗口控制：当前时间 {now.strftime('%H:%M')} 不在推送时间窗口 {time_range_start}-{time_range_end} 内，跳过推送"
			
 
				-            )
			
 
				-            return results
			
 
				-
			
 
				-        if CONFIG["PUSH_WINDOW"]["ONCE_PER_DAY"]:
			
 
				-            if push_manager.has_pushed_today():
			
 
				-                print(f"推送窗口控制：今天已推送过，跳过本次推送")
			
 
				-                return results
			
 
				-            else:
			
 
				-                print(f"推送窗口控制：今天首次推送")
			
 
				-
			
 
				-    report_data = prepare_report_data(stats, failed_ids, new_titles, id_to_name, mode)
			
 
				-
			
 
				-    update_info_to_send = update_info if CONFIG["SHOW_VERSION_UPDATE"] else None
			
 
				-
			
 
				-    # 发送到飞书（多账号）
			
 
				-    feishu_urls = parse_multi_account_config(CONFIG["FEISHU_WEBHOOK_URL"])
			
 
				-    if feishu_urls:
			
 
				-        feishu_urls = limit_accounts(feishu_urls, max_accounts, "飞书")
			
 
				-        feishu_results = []
			
 
				-        for i, url in enumerate(feishu_urls):
			
 
				-            if url:  # 跳过空值
			
 
				-                account_label = f"账号{i+1}" if len(feishu_urls) > 1 else ""
			
 
				-                result = send_to_feishu(
			
 
				-                    url, report_data, report_type, update_info_to_send, proxy_url, mode, account_label
			
 
				-                )
			
 
				-                feishu_results.append(result)
			
 
				-        results["feishu"] = any(feishu_results) if feishu_results else False
			
 
				-
			
 
				-    # 发送到钉钉（多账号）
			
 
				-    dingtalk_urls = parse_multi_account_config(CONFIG["DINGTALK_WEBHOOK_URL"])
			
 
				-    if dingtalk_urls:
			
 
				-        dingtalk_urls = limit_accounts(dingtalk_urls, max_accounts, "钉钉")
			
 
				-        dingtalk_results = []
			
 
				-        for i, url in enumerate(dingtalk_urls):
			
 
				-            if url:
			
 
				-                account_label = f"账号{i+1}" if len(dingtalk_urls) > 1 else ""
			
 
				-                result = send_to_dingtalk(
			
 
				-                    url, report_data, report_type, update_info_to_send, proxy_url, mode, account_label
			
 
				-                )
			
 
				-                dingtalk_results.append(result)
			
 
				-        results["dingtalk"] = any(dingtalk_results) if dingtalk_results else False
			
 
				-
			
 
				-    # 发送到企业微信（多账号）
			
 
				-    wework_urls = parse_multi_account_config(CONFIG["WEWORK_WEBHOOK_URL"])
			
 
				-    if wework_urls:
			
 
				-        wework_urls = limit_accounts(wework_urls, max_accounts, "企业微信")
			
 
				-        wework_results = []
			
 
				-        for i, url in enumerate(wework_urls):
			
 
				-            if url:
			
 
				-                account_label = f"账号{i+1}" if len(wework_urls) > 1 else ""
			
 
				-                result = send_to_wework(
			
 
				-                    url, report_data, report_type, update_info_to_send, proxy_url, mode, account_label
			
 
				-                )
			
 
				-                wework_results.append(result)
			
 
				-        results["wework"] = any(wework_results) if wework_results else False
			
 
				-
			
 
				-    # 发送到 Telegram（多账号，需验证配对）
			
 
				-    telegram_tokens = parse_multi_account_config(CONFIG["TELEGRAM_BOT_TOKEN"])
			
 
				-    telegram_chat_ids = parse_multi_account_config(CONFIG["TELEGRAM_CHAT_ID"])
			
 
				-    if telegram_tokens and telegram_chat_ids:
			
 
				-        valid, count = validate_paired_configs(
			
 
				-            {"bot_token": telegram_tokens, "chat_id": telegram_chat_ids},
			
 
				-            "Telegram",
			
 
				-            required_keys=["bot_token", "chat_id"]
			
 
				-        )
			
 
				-        if valid and count > 0:
			
 
				-            telegram_tokens = limit_accounts(telegram_tokens, max_accounts, "Telegram")
			
 
				-            telegram_chat_ids = telegram_chat_ids[:len(telegram_tokens)]  # 保持数量一致
			
 
				-            telegram_results = []
			
 
				-            for i in range(len(telegram_tokens)):
			
 
				-                token = telegram_tokens[i]
			
 
				-                chat_id = telegram_chat_ids[i]
			
 
				-                if token and chat_id:
			
 
				-                    account_label = f"账号{i+1}" if len(telegram_tokens) > 1 else ""
			
 
				-                    result = send_to_telegram(
			
 
				-                        token, chat_id, report_data, report_type,
			
 
				-                        update_info_to_send, proxy_url, mode, account_label
			
 
				-                    )
			
 
				-                    telegram_results.append(result)
			
 
				-            results["telegram"] = any(telegram_results) if telegram_results else False
			
 
				-
			
 
				-    # 发送到 ntfy（多账号，需验证配对）
			
 
				-    ntfy_server_url = CONFIG["NTFY_SERVER_URL"]
			
 
				-    ntfy_topics = parse_multi_account_config(CONFIG["NTFY_TOPIC"])
			
 
				-    ntfy_tokens = parse_multi_account_config(CONFIG["NTFY_TOKEN"])
			
 
				-    if ntfy_server_url and ntfy_topics:
			
 
				-        # 验证 token 和 topic 数量一致（如果配置了 token）
			
 
				-        if ntfy_tokens and len(ntfy_tokens) != len(ntfy_topics):
			
 
				-            print(f"❌ ntfy 配置错误：topic 数量({len(ntfy_topics)})与 token 数量({len(ntfy_tokens)})不一致，跳过 ntfy 推送")
			
 
				-        else:
			
 
				-            ntfy_topics = limit_accounts(ntfy_topics, max_accounts, "ntfy")
			
 
				-            if ntfy_tokens:
			
 
				-                ntfy_tokens = ntfy_tokens[:len(ntfy_topics)]
			
 
				-            ntfy_results = []
			
 
				-            for i, topic in enumerate(ntfy_topics):
			
 
				-                if topic:
			
 
				-                    token = get_account_at_index(ntfy_tokens, i, "") if ntfy_tokens else ""
			
 
				-                    account_label = f"账号{i+1}" if len(ntfy_topics) > 1 else ""
			
 
				-                    result = send_to_ntfy(
			
 
				-                        ntfy_server_url, topic, token, report_data, report_type,
			
 
				-                        update_info_to_send, proxy_url, mode, account_label
			
 
				-                    )
			
 
				-                    ntfy_results.append(result)
			
 
				-            results["ntfy"] = any(ntfy_results) if ntfy_results else False
			
 
				-
			
 
				-    # 发送到 Bark（多账号）
			
 
				-    bark_urls = parse_multi_account_config(CONFIG["BARK_URL"])
			
 
				-    if bark_urls:
			
 
				-        bark_urls = limit_accounts(bark_urls, max_accounts, "Bark")
			
 
				-        bark_results = []
			
 
				-        for i, url in enumerate(bark_urls):
			
 
				-            if url:
			
 
				-                account_label = f"账号{i+1}" if len(bark_urls) > 1 else ""
			
 
				-                result = send_to_bark(
			
 
				-                    url, report_data, report_type, update_info_to_send, proxy_url, mode, account_label
			
 
				-                )
			
 
				-                bark_results.append(result)
			
 
				-        results["bark"] = any(bark_results) if bark_results else False
			
 
				-
			
 
				-    # 发送到 Slack（多账号）
			
 
				-    slack_urls = parse_multi_account_config(CONFIG["SLACK_WEBHOOK_URL"])
			
 
				-    if slack_urls:
			
 
				-        slack_urls = limit_accounts(slack_urls, max_accounts, "Slack")
			
 
				-        slack_results = []
			
 
				-        for i, url in enumerate(slack_urls):
			
 
				-            if url:
			
 
				-                account_label = f"账号{i+1}" if len(slack_urls) > 1 else ""
			
 
				-                result = send_to_slack(
			
 
				-                    url, report_data, report_type, update_info_to_send, proxy_url, mode, account_label
			
 
				-                )
			
 
				-                slack_results.append(result)
			
 
				-        results["slack"] = any(slack_results) if slack_results else False
			
 
				-
			
 
				-    # 发送邮件（保持原有逻辑，已支持多收件人）
			
 
				-    email_from = CONFIG["EMAIL_FROM"]
			
 
				-    email_password = CONFIG["EMAIL_PASSWORD"]
			
 
				-    email_to = CONFIG["EMAIL_TO"]
			
 
				-    email_smtp_server = CONFIG.get("EMAIL_SMTP_SERVER", "")
			
 
				-    email_smtp_port = CONFIG.get("EMAIL_SMTP_PORT", "")
			
 
				-    if email_from and email_password and email_to:
			
 
				-        results["email"] = send_to_email(
			
 
				-            email_from,
			
 
				-            email_password,
			
 
				-            email_to,
			
 
				-            report_type,
			
 
				-            html_file_path,
			
 
				-            email_smtp_server,
			
 
				-            email_smtp_port,
			
 
				-        )
			
 
				-
			
 
				-    if not results:
			
 
				-        print("未配置任何通知渠道，跳过通知发送")
			
 
				-
			
 
				-    # 如果成功发送了任何通知，且启用了每天只推一次，则记录推送
			
 
				-    if (
			
 
				-        CONFIG["PUSH_WINDOW"]["ENABLED"]
			
 
				-        and CONFIG["PUSH_WINDOW"]["ONCE_PER_DAY"]
			
 
				-        and any(results.values())
			
 
				-    ):
			
 
				-        push_manager = PushRecordManager()
			
 
				-        push_manager.record_push(report_type)
			
 
				-
			
 
				-    return results
			
 
				-
			
 
				-
			
 
				-def send_to_feishu(
			
 
				-    webhook_url: str,
			
 
				-    report_data: Dict,
			
 
				-    report_type: str,
			
 
				-    update_info: Optional[Dict] = None,
			
 
				-    proxy_url: Optional[str] = None,
			
 
				-    mode: str = "daily",
			
 
				-    account_label: str = "",
			
 
				-) -> bool:
			
 
				-    """发送到飞书（支持分批发送）"""
			
 
				-    headers = {"Content-Type": "application/json"}
			
 
				-    proxies = None
			
 
				-    if proxy_url:
			
 
				-        proxies = {"http": proxy_url, "https": proxy_url}
			
 
				-
			
 
				-    # 日志前缀
			
 
				-    log_prefix = f"飞书{account_label}" if account_label else "飞书"
			
 
				-
			
 
				-    # 获取分批内容，使用飞书专用的批次大小
			
 
				-    feishu_batch_size = CONFIG.get("FEISHU_BATCH_SIZE", 29000)
			
 
				-    # 预留批次头部空间，避免添加头部后超限
			
 
				-    header_reserve = _get_max_batch_header_size("feishu")
			
 
				-    batches = split_content_into_batches(
			
 
				-        report_data,
			
 
				-        "feishu",
			
 
				-        update_info,
			
 
				-        max_bytes=feishu_batch_size - header_reserve,
			
 
				-        mode=mode,
			
 
				-    )
			
 
				-
			
 
				-    # 统一添加批次头部（已预留空间，不会超限）
			
 
				-    batches = add_batch_headers(batches, "feishu", feishu_batch_size)
			
 
				-
			
 
				-    print(f"{log_prefix}消息分为 {len(batches)} 批次发送 [{report_type}]")
			
 
				-
			
 
				-    # 逐批发送
			
 
				-    for i, batch_content in enumerate(batches, 1):
			
 
				-        batch_size = len(batch_content.encode("utf-8"))
			
 
				-        print(
			
 
				-            f"发送{log_prefix}第 {i}/{len(batches)} 批次，大小：{batch_size} 字节 [{report_type}]"
			
 
				-        )
			
 
				-
			
 
				-        total_titles = sum(
			
 
				-            len(stat["titles"]) for stat in report_data["stats"] if stat["count"] > 0
			
 
				-        )
			
 
				-        now = get_beijing_time()
			
 
				-
			
 
				-        payload = {
			
 
				-            "msg_type": "text",
			
 
				-            "content": {
			
 
				-                "total_titles": total_titles,
			
 
				-                "timestamp": now.strftime("%Y-%m-%d %H:%M:%S"),
			
 
				-                "report_type": report_type,
			
 
				-                "text": batch_content,
			
 
				-            },
			
 
				-        }
			
 
				-
			
 
				-        try:
			
 
				-            response = requests.post(
			
 
				-                webhook_url, headers=headers, json=payload, proxies=proxies, timeout=30
			
 
				-            )
			
 
				-            if response.status_code == 200:
			
 
				-                result = response.json()
			
 
				-                # 检查飞书的响应状态
			
 
				-                if result.get("StatusCode") == 0 or result.get("code") == 0:
			
 
				-                    print(f"{log_prefix}第 {i}/{len(batches)} 批次发送成功 [{report_type}]")
			
 
				-                    # 批次间间隔
			
 
				-                    if i < len(batches):
			
 
				-                        time.sleep(CONFIG["BATCH_SEND_INTERVAL"])
			
 
				-                else:
			
 
				-                    error_msg = result.get("msg") or result.get("StatusMessage", "未知错误")
			
 
				-                    print(
			
 
				-                        f"{log_prefix}第 {i}/{len(batches)} 批次发送失败 [{report_type}]，错误：{error_msg}"
			
 
				-                    )
			
 
				-                    return False
			
 
				-            else:
			
 
				-                print(
			
 
				-                    f"{log_prefix}第 {i}/{len(batches)} 批次发送失败 [{report_type}]，状态码：{response.status_code}"
			
 
				-                )
			
 
				-                return False
			
 
				-        except Exception as e:
			
 
				-            print(f"{log_prefix}第 {i}/{len(batches)} 批次发送出错 [{report_type}]：{e}")
			
 
				-            return False
			
 
				-
			
 
				-    print(f"{log_prefix}所有 {len(batches)} 批次发送完成 [{report_type}]")
			
 
				-    return True
			
 
				-
			
 
				-
			
 
				-def send_to_dingtalk(
			
 
				-    webhook_url: str,
			
 
				-    report_data: Dict,
			
 
				-    report_type: str,
			
 
				-    update_info: Optional[Dict] = None,
			
 
				-    proxy_url: Optional[str] = None,
			
 
				-    mode: str = "daily",
			
 
				-    account_label: str = "",
			
 
				-) -> bool:
			
 
				-    """发送到钉钉（支持分批发送）"""
			
 
				-    headers = {"Content-Type": "application/json"}
			
 
				-    proxies = None
			
 
				-    if proxy_url:
			
 
				-        proxies = {"http": proxy_url, "https": proxy_url}
			
 
				-
			
 
				-    # 日志前缀
			
 
				-    log_prefix = f"钉钉{account_label}" if account_label else "钉钉"
			
 
				-
			
 
				-    # 获取分批内容，使用钉钉专用的批次大小
			
 
				-    dingtalk_batch_size = CONFIG.get("DINGTALK_BATCH_SIZE", 20000)
			
 
				-    # 预留批次头部空间，避免添加头部后超限
			
 
				-    header_reserve = _get_max_batch_header_size("dingtalk")
			
 
				-    batches = split_content_into_batches(
			
 
				-        report_data,
			
 
				-        "dingtalk",
			
 
				-        update_info,
			
 
				-        max_bytes=dingtalk_batch_size - header_reserve,
			
 
				-        mode=mode,
			
 
				-    )
			
 
				-
			
 
				-    # 统一添加批次头部（已预留空间，不会超限）
			
 
				-    batches = add_batch_headers(batches, "dingtalk", dingtalk_batch_size)
			
 
				-
			
 
				-    print(f"{log_prefix}消息分为 {len(batches)} 批次发送 [{report_type}]")
			
 
				-
			
 
				-    # 逐批发送
			
 
				-    for i, batch_content in enumerate(batches, 1):
			
 
				-        batch_size = len(batch_content.encode("utf-8"))
			
 
				-        print(
			
 
				-            f"发送{log_prefix}第 {i}/{len(batches)} 批次，大小：{batch_size} 字节 [{report_type}]"
			
 
				-        )
			
 
				-
			
 
				-        payload = {
			
 
				-            "msgtype": "markdown",
			
 
				-            "markdown": {
			
 
				-                "title": f"TrendRadar 热点分析报告 - {report_type}",
			
 
				-                "text": batch_content,
			
 
				-            },
			
 
				-        }
			
 
				-
			
 
				-        try:
			
 
				-            response = requests.post(
			
 
				-                webhook_url, headers=headers, json=payload, proxies=proxies, timeout=30
			
 
				-            )
			
 
				-            if response.status_code == 200:
			
 
				-                result = response.json()
			
 
				-                if result.get("errcode") == 0:
			
 
				-                    print(f"{log_prefix}第 {i}/{len(batches)} 批次发送成功 [{report_type}]")
			
 
				-                    # 批次间间隔
			
 
				-                    if i < len(batches):
			
 
				-                        time.sleep(CONFIG["BATCH_SEND_INTERVAL"])
			
 
				-                else:
			
 
				-                    print(
			
 
				-                        f"{log_prefix}第 {i}/{len(batches)} 批次发送失败 [{report_type}]，错误：{result.get('errmsg')}"
			
 
				-                    )
			
 
				-                    return False
			
 
				-            else:
			
 
				-                print(
			
 
				-                    f"{log_prefix}第 {i}/{len(batches)} 批次发送失败 [{report_type}]，状态码：{response.status_code}"
			
 
				-                )
			
 
				-                return False
			
 
				-        except Exception as e:
			
 
				-            print(f"{log_prefix}第 {i}/{len(batches)} 批次发送出错 [{report_type}]：{e}")
			
 
				-            return False
			
 
				-
			
 
				-    print(f"{log_prefix}所有 {len(batches)} 批次发送完成 [{report_type}]")
			
 
				-    return True
			
 
				-
			
 
				-
			
 
				-def strip_markdown(text: str) -> str:
			
 
				-    """去除文本中的 markdown 语法格式，用于个人微信推送"""
			
 
				-
			
 
				-    # 去除粗体 **text** 或 __text__
			
 
				-    text = re.sub(r'\*\*(.+?)\*\*', r'\1', text)
			
 
				-    text = re.sub(r'__(.+?)__', r'\1', text)
			
 
				-
			
 
				-    # 去除斜体 *text* 或 _text_
			
 
				-    text = re.sub(r'\*(.+?)\*', r'\1', text)
			
 
				-    text = re.sub(r'_(.+?)_', r'\1', text)
			
 
				-
			
 
				-    # 去除删除线 ~~text~~
			
 
				-    text = re.sub(r'~~(.+?)~~', r'\1', text)
			
 
				-
			
 
				-    # 转换链接 [text](url) -> text url（保留 URL）
			
 
				-    text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', r'\1 \2', text)
			
 
				-    # 如果不需要保留 URL，可以使用下面这行（只保留标题文本）：
			
 
				-    # text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', text)
			
 
				-
			
 
				-    # 去除图片 ![alt](url) -> alt
			
 
				-    text = re.sub(r'!\[(.+?)\]\(.+?\)', r'\1', text)
			
 
				-
			
 
				-    # 去除行内代码 `code`
			
 
				-    text = re.sub(r'`(.+?)`', r'\1', text)
			
 
				-
			
 
				-    # 去除引用符号 >
			
 
				-    text = re.sub(r'^>\s*', '', text, flags=re.MULTILINE)
			
 
				-
			
 
				-    # 去除标题符号 # ## ### 等
			
 
				-    text = re.sub(r'^#+\s*', '', text, flags=re.MULTILINE)
			
 
				-
			
 
				-    # 去除水平分割线 --- 或 ***
			
 
				-    text = re.sub(r'^[\-\*]{3,}\s*$', '', text, flags=re.MULTILINE)
			
 
				-
			
 
				-    # 去除 HTML 标签 <font color='xxx'>text</font> -> text
			
 
				-    text = re.sub(r'<font[^>]*>(.+?)</font>', r'\1', text)
			
 
				-    text = re.sub(r'<[^>]+>', '', text)
			
 
				-
			
 
				-    # 清理多余的空行（保留最多两个连续空行）
			
 
				-    text = re.sub(r'\n{3,}', '\n\n', text)
			
 
				-
			
 
				-    return text.strip()
			
 
				-
			
 
				-
			
 
				-def send_to_wework(
			
 
				-    webhook_url: str,
			
 
				-    report_data: Dict,
			
 
				-    report_type: str,
			
 
				-    update_info: Optional[Dict] = None,
			
 
				-    proxy_url: Optional[str] = None,
			
 
				-    mode: str = "daily",
			
 
				-    account_label: str = "",
			
 
				-) -> bool:
			
 
				-    """发送到企业微信（支持分批发送，支持 markdown 和 text 两种格式）"""
			
 
				-    headers = {"Content-Type": "application/json"}
			
 
				-    proxies = None
			
 
				-    if proxy_url:
			
 
				-        proxies = {"http": proxy_url, "https": proxy_url}
			
 
				-
			
 
				-    # 日志前缀
			
 
				-    log_prefix = f"企业微信{account_label}" if account_label else "企业微信"
			
 
				-
			
 
				-    # 获取消息类型配置（markdown 或 text）
			
 
				-    msg_type = CONFIG.get("WEWORK_MSG_TYPE", "markdown").lower()
			
 
				-    is_text_mode = msg_type == "text"
			
 
				-
			
 
				-    if is_text_mode:
			
 
				-        print(f"{log_prefix}使用 text 格式（个人微信模式）[{report_type}]")
			
 
				-    else:
			
 
				-        print(f"{log_prefix}使用 markdown 格式（群机器人模式）[{report_type}]")
			
 
				-
			
 
				-    # text 模式使用 wework_text，markdown 模式使用 wework
			
 
				-    header_format_type = "wework_text" if is_text_mode else "wework"
			
 
				-
			
 
				-    # 获取分批内容，预留批次头部空间
			
 
				-    wework_batch_size = CONFIG.get("MESSAGE_BATCH_SIZE", 4000)
			
 
				-    header_reserve = _get_max_batch_header_size(header_format_type)
			
 
				-    batches = split_content_into_batches(
			
 
				-        report_data, "wework", update_info, max_bytes=wework_batch_size - header_reserve, mode=mode
			
 
				-    )
			
 
				-
			
 
				-    # 统一添加批次头部（已预留空间，不会超限）
			
 
				-    batches = add_batch_headers(batches, header_format_type, wework_batch_size)
			
 
				-
			
 
				-    print(f"{log_prefix}消息分为 {len(batches)} 批次发送 [{report_type}]")
			
 
				-
			
 
				-    # 逐批发送
			
 
				-    for i, batch_content in enumerate(batches, 1):
			
 
				-        # 根据消息类型构建 payload
			
 
				-        if is_text_mode:
			
 
				-            # text 格式：去除 markdown 语法
			
 
				-            plain_content = strip_markdown(batch_content)
			
 
				-            payload = {"msgtype": "text", "text": {"content": plain_content}}
			
 
				-            batch_size = len(plain_content.encode("utf-8"))
			
 
				-        else:
			
 
				-            # markdown 格式：保持原样
			
 
				-            payload = {"msgtype": "markdown", "markdown": {"content": batch_content}}
			
 
				-            batch_size = len(batch_content.encode("utf-8"))
			
 
				-
			
 
				-        print(
			
 
				-            f"发送{log_prefix}第 {i}/{len(batches)} 批次，大小：{batch_size} 字节 [{report_type}]"
			
 
				-        )
			
 
				-
			
 
				-        try:
			
 
				-            response = requests.post(
			
 
				-                webhook_url, headers=headers, json=payload, proxies=proxies, timeout=30
			
 
				-            )
			
 
				-            if response.status_code == 200:
			
 
				-                result = response.json()
			
 
				-                if result.get("errcode") == 0:
			
 
				-                    print(f"{log_prefix}第 {i}/{len(batches)} 批次发送成功 [{report_type}]")
			
 
				-                    # 批次间间隔
			
 
				-                    if i < len(batches):
			
 
				-                        time.sleep(CONFIG["BATCH_SEND_INTERVAL"])
			
 
				-                else:
			
 
				-                    print(
			
 
				-                        f"{log_prefix}第 {i}/{len(batches)} 批次发送失败 [{report_type}]，错误：{result.get('errmsg')}"
			
 
				-                    )
			
 
				-                    return False
			
 
				-            else:
			
 
				-                print(
			
 
				-                    f"{log_prefix}第 {i}/{len(batches)} 批次发送失败 [{report_type}]，状态码：{response.status_code}"
			
 
				-                )
			
 
				-                return False
			
 
				-        except Exception as e:
			
 
				-            print(f"{log_prefix}第 {i}/{len(batches)} 批次发送出错 [{report_type}]：{e}")
			
 
				-            return False
			
 
				-
			
 
				-    print(f"{log_prefix}所有 {len(batches)} 批次发送完成 [{report_type}]")
			
 
				-    return True
			
 
				-
			
 
				-
			
 
				-def send_to_telegram(
			
 
				-    bot_token: str,
			
 
				-    chat_id: str,
			
 
				-    report_data: Dict,
			
 
				-    report_type: str,
			
 
				-    update_info: Optional[Dict] = None,
			
 
				-    proxy_url: Optional[str] = None,
			
 
				-    mode: str = "daily",
			
 
				-    account_label: str = "",
			
 
				-) -> bool:
			
 
				-    """发送到Telegram（支持分批发送）"""
			
 
				-    headers = {"Content-Type": "application/json"}
			
 
				-    url = f"https://api.telegram.org/bot{bot_token}/sendMessage"
			
 
				-
			
 
				-    proxies = None
			
 
				-    if proxy_url:
			
 
				-        proxies = {"http": proxy_url, "https": proxy_url}
			
 
				-
			
 
				-    # 日志前缀
			
 
				-    log_prefix = f"Telegram{account_label}" if account_label else "Telegram"
			
 
				-
			
 
				-    # 获取分批内容，预留批次头部空间
			
 
				-    telegram_batch_size = CONFIG.get("MESSAGE_BATCH_SIZE", 4000)
			
 
				-    header_reserve = _get_max_batch_header_size("telegram")
			
 
				-    batches = split_content_into_batches(
			
 
				-        report_data, "telegram", update_info, max_bytes=telegram_batch_size - header_reserve, mode=mode
			
 
				-    )
			
 
				-
			
 
				-    # 统一添加批次头部（已预留空间，不会超限）
			
 
				-    batches = add_batch_headers(batches, "telegram", telegram_batch_size)
			
 
				-
			
 
				-    print(f"{log_prefix}消息分为 {len(batches)} 批次发送 [{report_type}]")
			
 
				-
			
 
				-    # 逐批发送
			
 
				-    for i, batch_content in enumerate(batches, 1):
			
 
				-        batch_size = len(batch_content.encode("utf-8"))
			
 
				-        print(
			
 
				-            f"发送{log_prefix}第 {i}/{len(batches)} 批次，大小：{batch_size} 字节 [{report_type}]"
			
 
				-        )
			
 
				-
			
 
				-        payload = {
			
 
				-            "chat_id": chat_id,
			
 
				-            "text": batch_content,
			
 
				-            "parse_mode": "HTML",
			
 
				-            "disable_web_page_preview": True,
			
 
				-        }
			
 
				-
			
 
				-        try:
			
 
				-            response = requests.post(
			
 
				-                url, headers=headers, json=payload, proxies=proxies, timeout=30
			
 
				-            )
			
 
				-            if response.status_code == 200:
			
 
				-                result = response.json()
			
 
				-                if result.get("ok"):
			
 
				-                    print(f"{log_prefix}第 {i}/{len(batches)} 批次发送成功 [{report_type}]")
			
 
				-                    # 批次间间隔
			
 
				-                    if i < len(batches):
			
 
				-                        time.sleep(CONFIG["BATCH_SEND_INTERVAL"])
			
 
				-                else:
			
 
				-                    print(
			
 
				-                        f"{log_prefix}第 {i}/{len(batches)} 批次发送失败 [{report_type}]，错误：{result.get('description')}"
			
 
				-                    )
			
 
				-                    return False
			
 
				-            else:
			
 
				-                print(
			
 
				-                    f"{log_prefix}第 {i}/{len(batches)} 批次发送失败 [{report_type}]，状态码：{response.status_code}"
			
 
				-                )
			
 
				-                return False
			
 
				-        except Exception as e:
			
 
				-            print(f"{log_prefix}第 {i}/{len(batches)} 批次发送出错 [{report_type}]：{e}")
			
 
				-            return False
			
 
				-
			
 
				-    print(f"{log_prefix}所有 {len(batches)} 批次发送完成 [{report_type}]")
			
 
				-    return True
			
 
				-
			
 
				-
			
 
				-def send_to_email(
			
 
				-    from_email: str,
			
 
				-    password: str,
			
 
				-    to_email: str,
			
 
				-    report_type: str,
			
 
				-    html_file_path: str,
			
 
				-    custom_smtp_server: Optional[str] = None,
			
 
				-    custom_smtp_port: Optional[int] = None,
			
 
				-) -> bool:
			
 
				-    """发送邮件通知"""
			
 
				-    try:
			
 
				-        if not html_file_path or not Path(html_file_path).exists():
			
 
				-            print(f"错误：HTML文件不存在或未提供: {html_file_path}")
			
 
				-            return False
			
 
				-
			
 
				-        print(f"使用HTML文件: {html_file_path}")
			
 
				-        with open(html_file_path, "r", encoding="utf-8") as f:
			
 
				-            html_content = f.read()
			
 
				-
			
 
				-        domain = from_email.split("@")[-1].lower()
			
 
				-
			
 
				-        if custom_smtp_server and custom_smtp_port:
			
 
				-            # 使用自定义 SMTP 配置
			
 
				-            smtp_server = custom_smtp_server
			
 
				-            smtp_port = int(custom_smtp_port)
			
 
				-            # 根据端口判断加密方式：465=SSL, 587=TLS
			
 
				-            if smtp_port == 465:
			
 
				-                use_tls = False  # SSL 模式（SMTP_SSL）
			
 
				-            elif smtp_port == 587:
			
 
				-                use_tls = True   # TLS 模式（STARTTLS）
			
 
				-            else:
			
 
				-                # 其他端口优先尝试 TLS（更安全，更广泛支持）
			
 
				-                use_tls = True
			
 
				-        elif domain in SMTP_CONFIGS:
			
 
				-            # 使用预设配置
			
 
				-            config = SMTP_CONFIGS[domain]
			
 
				-            smtp_server = config["server"]
			
 
				-            smtp_port = config["port"]
			
 
				-            use_tls = config["encryption"] == "TLS"
			
 
				-        else:
			
 
				-            print(f"未识别的邮箱服务商: {domain}，使用通用 SMTP 配置")
			
 
				-            smtp_server = f"smtp.{domain}"
			
 
				-            smtp_port = 587
			
 
				-            use_tls = True
			
 
				-
			
 
				-        msg = MIMEMultipart("alternative")
			
 
				-
			
 
				-        # 严格按照 RFC 标准设置 From header
			
 
				-        sender_name = "TrendRadar"
			
 
				-        msg["From"] = formataddr((sender_name, from_email))
			
 
				-
			
 
				-        # 设置收件人
			
 
				-        recipients = [addr.strip() for addr in to_email.split(",")]
			
 
				-        if len(recipients) == 1:
			
 
				-            msg["To"] = recipients[0]
			
 
				-        else:
			
 
				-            msg["To"] = ", ".join(recipients)
			
 
				-
			
 
				-        # 设置邮件主题
			
 
				-        now = get_beijing_time()
			
 
				-        subject = f"TrendRadar 热点分析报告 - {report_type} - {now.strftime('%m月%d日 %H:%M')}"
			
 
				-        msg["Subject"] = Header(subject, "utf-8")
			
 
				-
			
 
				-        # 设置其他标准 header
			
 
				-        msg["MIME-Version"] = "1.0"
			
 
				-        msg["Date"] = formatdate(localtime=True)
			
 
				-        msg["Message-ID"] = make_msgid()
			
 
				-
			
 
				-        # 添加纯文本部分（作为备选）
			
 
				-        text_content = f"""
			
 
				-TrendRadar 热点分析报告
			
 
				-========================
			
 
				-报告类型：{report_type}
			
 
				-生成时间：{now.strftime('%Y-%m-%d %H:%M:%S')}
			
 
				-
			
 
				-请使用支持HTML的邮件客户端查看完整报告内容。
			
 
				-        """
			
 
				-        text_part = MIMEText(text_content, "plain", "utf-8")
			
 
				-        msg.attach(text_part)
			
 
				-
			
 
				-        html_part = MIMEText(html_content, "html", "utf-8")
			
 
				-        msg.attach(html_part)
			
 
				-
			
 
				-        print(f"正在发送邮件到 {to_email}...")
			
 
				-        print(f"SMTP 服务器: {smtp_server}:{smtp_port}")
			
 
				-        print(f"发件人: {from_email}")
			
 
				-
			
 
				-        try:
			
 
				-            if use_tls:
			
 
				-                # TLS 模式
			
 
				-                server = smtplib.SMTP(smtp_server, smtp_port, timeout=30)
			
 
				-                server.set_debuglevel(0)  # 设为1可以查看详细调试信息
			
 
				-                server.ehlo()
			
 
				-                server.starttls()
			
 
				-                server.ehlo()
			
 
				-            else:
			
 
				-                # SSL 模式
			
 
				-                server = smtplib.SMTP_SSL(smtp_server, smtp_port, timeout=30)
			
 
				-                server.set_debuglevel(0)
			
 
				-                server.ehlo()
			
 
				-
			
 
				-            # 登录
			
 
				-            server.login(from_email, password)
			
 
				-
			
 
				-            # 发送邮件
			
 
				-            server.send_message(msg)
			
 
				-            server.quit()
			
 
				-
			
 
				-            print(f"邮件发送成功 [{report_type}] -> {to_email}")
			
 
				-            return True
			
 
				-
			
 
				-        except smtplib.SMTPServerDisconnected:
			
 
				-            print(f"邮件发送失败：服务器意外断开连接，请检查网络或稍后重试")
			
 
				-            return False
			
 
				-
			
 
				-    except smtplib.SMTPAuthenticationError as e:
			
 
				-        print(f"邮件发送失败：认证错误，请检查邮箱和密码/授权码")
			
 
				-        print(f"详细错误: {str(e)}")
			
 
				-        return False
			
 
				-    except smtplib.SMTPRecipientsRefused as e:
			
 
				-        print(f"邮件发送失败：收件人地址被拒绝 {e}")
			
 
				-        return False
			
 
				-    except smtplib.SMTPSenderRefused as e:
			
 
				-        print(f"邮件发送失败：发件人地址被拒绝 {e}")
			
 
				-        return False
			
 
				-    except smtplib.SMTPDataError as e:
			
 
				-        print(f"邮件发送失败：邮件数据错误 {e}")
			
 
				-        return False
			
 
				-    except smtplib.SMTPConnectError as e:
			
 
				-        print(f"邮件发送失败：无法连接到 SMTP 服务器 {smtp_server}:{smtp_port}")
			
 
				-        print(f"详细错误: {str(e)}")
			
 
				-        return False
			
 
				-    except Exception as e:
			
 
				-        print(f"邮件发送失败 [{report_type}]：{e}")
			
 
				-        import traceback
			
 
				-
			
 
				-        traceback.print_exc()
			
 
				-        return False
			
 
				-
			
 
				-
			
 
				-def send_to_ntfy(
			
 
				-    server_url: str,
			
 
				-    topic: str,
			
 
				-    token: Optional[str],
			
 
				-    report_data: Dict,
			
 
				-    report_type: str,
			
 
				-    update_info: Optional[Dict] = None,
			
 
				-    proxy_url: Optional[str] = None,
			
 
				-    mode: str = "daily",
			
 
				-    account_label: str = "",
			
 
				-) -> bool:
			
 
				-    """发送到ntfy（支持分批发送，严格遵守4KB限制）"""
			
 
				-    # 日志前缀
			
 
				-    log_prefix = f"ntfy{account_label}" if account_label else "ntfy"
			
 
				-
			
 
				-    # 避免 HTTP header 编码问题
			
 
				-    report_type_en_map = {
			
 
				-        "当日汇总": "Daily Summary",
			
 
				-        "当前榜单汇总": "Current Ranking",
			
 
				-        "增量更新": "Incremental Update",
			
 
				-        "实时增量": "Realtime Incremental", 
			
 
				-        "实时当前榜单": "Realtime Current Ranking",  
			
 
				-    }
			
 
				-    report_type_en = report_type_en_map.get(report_type, "News Report") 
			
 
				-
			
 
				-    headers = {
			
 
				-        "Content-Type": "text/plain; charset=utf-8",
			
 
				-        "Markdown": "yes",
			
 
				-        "Title": report_type_en,
			
 
				-        "Priority": "default",
			
 
				-        "Tags": "news",
			
 
				-    }
			
 
				-
			
 
				-    if token:
			
 
				-        headers["Authorization"] = f"Bearer {token}"
			
 
				-
			
 
				-    # 构建完整URL，确保格式正确
			
 
				-    base_url = server_url.rstrip("/")
			
 
				-    if not base_url.startswith(("http://", "https://")):
			
 
				-        base_url = f"https://{base_url}"
			
 
				-    url = f"{base_url}/{topic}"
			
 
				-
			
 
				-    proxies = None
			
 
				-    if proxy_url:
			
 
				-        proxies = {"http": proxy_url, "https": proxy_url}
			
 
				-
			
 
				-    # 获取分批内容，使用ntfy专用的4KB限制，预留批次头部空间
			
 
				-    ntfy_batch_size = 3800
			
 
				-    header_reserve = _get_max_batch_header_size("ntfy")
			
 
				-    batches = split_content_into_batches(
			
 
				-        report_data, "ntfy", update_info, max_bytes=ntfy_batch_size - header_reserve, mode=mode
			
 
				-    )
			
 
				-
			
 
				-    # 统一添加批次头部（已预留空间，不会超限）
			
 
				-    batches = add_batch_headers(batches, "ntfy", ntfy_batch_size)
			
 
				-
			
 
				-    total_batches = len(batches)
			
 
				-    print(f"{log_prefix}消息分为 {total_batches} 批次发送 [{report_type}]")
			
 
				-
			
 
				-    # 反转批次顺序，使得在ntfy客户端显示时顺序正确
			
 
				-    # ntfy显示最新消息在上面，所以我们从最后一批开始推送
			
 
				-    reversed_batches = list(reversed(batches))
			
 
				-
			
 
				-    print(f"{log_prefix}将按反向顺序推送（最后批次先推送），确保客户端显示顺序正确")
			
 
				-
			
 
				-    # 逐批发送（反向顺序）
			
 
				-    success_count = 0
			
 
				-    for idx, batch_content in enumerate(reversed_batches, 1):
			
 
				-        # 计算正确的批次编号（用户视角的编号）
			
 
				-        actual_batch_num = total_batches - idx + 1
			
 
				-
			
 
				-        batch_size = len(batch_content.encode("utf-8"))
			
 
				-        print(
			
 
				-            f"发送{log_prefix}第 {actual_batch_num}/{total_batches} 批次（推送顺序: {idx}/{total_batches}），大小：{batch_size} 字节 [{report_type}]"
			
 
				-        )
			
 
				-
			
 
				-        # 检查消息大小，确保不超过4KB
			
 
				-        if batch_size > 4096:
			
 
				-            print(f"警告：{log_prefix}第 {actual_batch_num} 批次消息过大（{batch_size} 字节），可能被拒绝")
			
 
				-
			
 
				-        # 更新 headers 的批次标识
			
 
				-        current_headers = headers.copy()
			
 
				-        if total_batches > 1:
			
 
				-            current_headers["Title"] = (
			
 
				-                f"{report_type_en} ({actual_batch_num}/{total_batches})"
			
 
				-            )
			
 
				-
			
 
				-        try:
			
 
				-            response = requests.post(
			
 
				-                url,
			
 
				-                headers=current_headers,
			
 
				-                data=batch_content.encode("utf-8"),
			
 
				-                proxies=proxies,
			
 
				-                timeout=30,
			
 
				-            )
			
 
				-
			
 
				-            if response.status_code == 200:
			
 
				-                print(f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次发送成功 [{report_type}]")
			
 
				-                success_count += 1
			
 
				-                if idx < total_batches:
			
 
				-                    # 公共服务器建议 2-3 秒，自托管可以更短
			
 
				-                    interval = 2 if "ntfy.sh" in server_url else 1
			
 
				-                    time.sleep(interval)
			
 
				-            elif response.status_code == 429:
			
 
				-                print(
			
 
				-                    f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次速率限制 [{report_type}]，等待后重试"
			
 
				-                )
			
 
				-                time.sleep(10)  # 等待10秒后重试
			
 
				-                # 重试一次
			
 
				-                retry_response = requests.post(
			
 
				-                    url,
			
 
				-                    headers=current_headers,
			
 
				-                    data=batch_content.encode("utf-8"),
			
 
				-                    proxies=proxies,
			
 
				-                    timeout=30,
			
 
				-                )
			
 
				-                if retry_response.status_code == 200:
			
 
				-                    print(f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次重试成功 [{report_type}]")
			
 
				-                    success_count += 1
			
 
				-                else:
			
 
				-                    print(
			
 
				-                        f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次重试失败，状态码：{retry_response.status_code}"
			
 
				-                    )
			
 
				-            elif response.status_code == 413:
			
 
				-                print(
			
 
				-                    f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次消息过大被拒绝 [{report_type}]，消息大小：{batch_size} 字节"
			
 
				-                )
			
 
				-            else:
			
 
				-                print(
			
 
				-                    f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次发送失败 [{report_type}]，状态码：{response.status_code}"
			
 
				-                )
			
 
				-                try:
			
 
				-                    print(f"错误详情：{response.text}")
			
 
				-                except:
			
 
				-                    pass
			
 
				-
			
 
				-        except requests.exceptions.ConnectTimeout:
			
 
				-            print(f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次连接超时 [{report_type}]")
			
 
				-        except requests.exceptions.ReadTimeout:
			
 
				-            print(f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次读取超时 [{report_type}]")
			
 
				-        except requests.exceptions.ConnectionError as e:
			
 
				-            print(f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次连接错误 [{report_type}]：{e}")
			
 
				-        except Exception as e:
			
 
				-            print(f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次发送异常 [{report_type}]：{e}")
			
 
				-
			
 
				-    # 判断整体发送是否成功
			
 
				-    if success_count == total_batches:
			
 
				-        print(f"{log_prefix}所有 {total_batches} 批次发送完成 [{report_type}]")
			
 
				-        return True
			
 
				-    elif success_count > 0:
			
 
				-        print(f"{log_prefix}部分发送成功：{success_count}/{total_batches} 批次 [{report_type}]")
			
 
				-        return True  # 部分成功也视为成功
			
 
				-    else:
			
 
				-        print(f"{log_prefix}发送完全失败 [{report_type}]")
			
 
				-        return False
			
 
				-
			
 
				-
			
 
				-def send_to_bark(
			
 
				-    bark_url: str,
			
 
				-    report_data: Dict,
			
 
				-    report_type: str,
			
 
				-    update_info: Optional[Dict] = None,
			
 
				-    proxy_url: Optional[str] = None,
			
 
				-    mode: str = "daily",
			
 
				-    account_label: str = "",
			
 
				-) -> bool:
			
 
				-    """发送到Bark（支持分批发送，使用 markdown 格式）"""
			
 
				-    # 日志前缀
			
 
				-    log_prefix = f"Bark{account_label}" if account_label else "Bark"
			
 
				-
			
 
				-    proxies = None
			
 
				-    if proxy_url:
			
 
				-        proxies = {"http": proxy_url, "https": proxy_url}
			
 
				-
			
 
				-    # 解析 Bark URL，提取 device_key 和 API 端点
			
 
				-    # Bark URL 格式: https://api.day.app/device_key 或 https://bark.day.app/device_key
			
 
				-    from urllib.parse import urlparse
			
 
				-
			
 
				-    parsed_url = urlparse(bark_url)
			
 
				-    device_key = parsed_url.path.strip('/').split('/')[0] if parsed_url.path else None
			
 
				-
			
 
				-    if not device_key:
			
 
				-        print(f"{log_prefix} URL 格式错误，无法提取 device_key: {bark_url}")
			
 
				-        return False
			
 
				-
			
 
				-    # 构建正确的 API 端点
			
 
				-    api_endpoint = f"{parsed_url.scheme}://{parsed_url.netloc}/push"
			
 
				-
			
 
				-    # 获取分批内容（Bark 限制为 3600 字节以避免 413 错误），预留批次头部空间
			
 
				-    bark_batch_size = CONFIG["BARK_BATCH_SIZE"]
			
 
				-    header_reserve = _get_max_batch_header_size("bark")
			
 
				-    batches = split_content_into_batches(
			
 
				-        report_data, "bark", update_info, max_bytes=bark_batch_size - header_reserve, mode=mode
			
 
				-    )
			
 
				-
			
 
				-    # 统一添加批次头部（已预留空间，不会超限）
			
 
				-    batches = add_batch_headers(batches, "bark", bark_batch_size)
			
 
				-
			
 
				-    total_batches = len(batches)
			
 
				-    print(f"{log_prefix}消息分为 {total_batches} 批次发送 [{report_type}]")
			
 
				-
			
 
				-    # 反转批次顺序，使得在Bark客户端显示时顺序正确
			
 
				-    # Bark显示最新消息在上面，所以我们从最后一批开始推送
			
 
				-    reversed_batches = list(reversed(batches))
			
 
				-
			
 
				-    print(f"{log_prefix}将按反向顺序推送（最后批次先推送），确保客户端显示顺序正确")
			
 
				-
			
 
				-    # 逐批发送（反向顺序）
			
 
				-    success_count = 0
			
 
				-    for idx, batch_content in enumerate(reversed_batches, 1):
			
 
				-        # 计算正确的批次编号（用户视角的编号）
			
 
				-        actual_batch_num = total_batches - idx + 1
			
 
				-
			
 
				-        batch_size = len(batch_content.encode("utf-8"))
			
 
				-        print(
			
 
				-            f"发送{log_prefix}第 {actual_batch_num}/{total_batches} 批次（推送顺序: {idx}/{total_batches}），大小：{batch_size} 字节 [{report_type}]"
			
 
				-        )
			
 
				-
			
 
				-        # 检查消息大小（Bark使用APNs，限制4KB）
			
 
				-        if batch_size > 4096:
			
 
				-            print(
			
 
				-                f"警告：{log_prefix}第 {actual_batch_num}/{total_batches} 批次消息过大（{batch_size} 字节），可能被拒绝"
			
 
				-            )
			
 
				-
			
 
				-        # 构建JSON payload
			
 
				-        payload = {
			
 
				-            "title": report_type,
			
 
				-            "markdown": batch_content,
			
 
				-            "device_key": device_key,
			
 
				-            "sound": "default",
			
 
				-            "group": "TrendRadar",
			
 
				-            "action": "none",  # 点击推送跳到 APP 不弹出弹框,方便阅读
			
 
				-        }
			
 
				-
			
 
				-        try:
			
 
				-            response = requests.post(
			
 
				-                api_endpoint,
			
 
				-                json=payload,
			
 
				-                proxies=proxies,
			
 
				-                timeout=30,
			
 
				-            )
			
 
				-
			
 
				-            if response.status_code == 200:
			
 
				-                result = response.json()
			
 
				-                if result.get("code") == 200:
			
 
				-                    print(f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次发送成功 [{report_type}]")
			
 
				-                    success_count += 1
			
 
				-                    # 批次间间隔
			
 
				-                    if idx < total_batches:
			
 
				-                        time.sleep(CONFIG["BATCH_SEND_INTERVAL"])
			
 
				-                else:
			
 
				-                    print(
			
 
				-                        f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次发送失败 [{report_type}]，错误：{result.get('message', '未知错误')}"
			
 
				-                    )
			
 
				-            else:
			
 
				-                print(
			
 
				-                    f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次发送失败 [{report_type}]，状态码：{response.status_code}"
			
 
				-                )
			
 
				-                try:
			
 
				-                    print(f"错误详情：{response.text}")
			
 
				-                except:
			
 
				-                    pass
			
 
				-
			
 
				-        except requests.exceptions.ConnectTimeout:
			
 
				-            print(f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次连接超时 [{report_type}]")
			
 
				-        except requests.exceptions.ReadTimeout:
			
 
				-            print(f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次读取超时 [{report_type}]")
			
 
				-        except requests.exceptions.ConnectionError as e:
			
 
				-            print(f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次连接错误 [{report_type}]：{e}")
			
 
				-        except Exception as e:
			
 
				-            print(f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次发送异常 [{report_type}]：{e}")
			
 
				-
			
 
				-    # 判断整体发送是否成功
			
 
				-    if success_count == total_batches:
			
 
				-        print(f"{log_prefix}所有 {total_batches} 批次发送完成 [{report_type}]")
			
 
				-        return True
			
 
				-    elif success_count > 0:
			
 
				-        print(f"{log_prefix}部分发送成功：{success_count}/{total_batches} 批次 [{report_type}]")
			
 
				-        return True  # 部分成功也视为成功
			
 
				-    else:
			
 
				-        print(f"{log_prefix}发送完全失败 [{report_type}]")
			
 
				-        return False
			
 
				-
			
 
				-
			
 
				-def convert_markdown_to_mrkdwn(content: str) -> str:
			
 
				-    """
			
 
				-    将标准 Markdown 转换为 Slack 的 mrkdwn 格式
			
 
				-
			
 
				-    转换规则：
			
 
				-    - **粗体** → *粗体*
			
 
				-    - [文本](url) → <url|文本>
			
 
				-    - 保留其他格式（代码块、列表等）
			
 
				-    """
			
 
				-    # 1. 转换链接格式: [文本](url) → <url|文本>
			
 
				-    content = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', r'<\2|\1>', content)
			
 
				-
			
 
				-    # 2. 转换粗体: **文本** → *文本*
			
 
				-    content = re.sub(r'\*\*([^*]+)\*\*', r'*\1*', content)
			
 
				-
			
 
				-    return content
			
 
				-
			
 
				-
			
 
				-def send_to_slack(
			
 
				-    webhook_url: str,
			
 
				-    report_data: Dict,
			
 
				-    report_type: str,
			
 
				-    update_info: Optional[Dict] = None,
			
 
				-    proxy_url: Optional[str] = None,
			
 
				-    mode: str = "daily",
			
 
				-    account_label: str = "",
			
 
				-) -> bool:
			
 
				-    """发送到Slack（支持分批发送，使用 mrkdwn 格式）"""
			
 
				-    headers = {"Content-Type": "application/json"}
			
 
				-    proxies = None
			
 
				-    if proxy_url:
			
 
				-        proxies = {"http": proxy_url, "https": proxy_url}
			
 
				-
			
 
				-    # 日志前缀
			
 
				-    log_prefix = f"Slack{account_label}" if account_label else "Slack"
			
 
				-
			
 
				-    # 获取分批内容（使用 Slack 批次大小），预留批次头部空间
			
 
				-    slack_batch_size = CONFIG["SLACK_BATCH_SIZE"]
			
 
				-    header_reserve = _get_max_batch_header_size("slack")
			
 
				-    batches = split_content_into_batches(
			
 
				-        report_data, "slack", update_info, max_bytes=slack_batch_size - header_reserve, mode=mode
			
 
				-    )
			
 
				-
			
 
				-    # 统一添加批次头部（已预留空间，不会超限）
			
 
				-    batches = add_batch_headers(batches, "slack", slack_batch_size)
			
 
				-
			
 
				-    print(f"{log_prefix}消息分为 {len(batches)} 批次发送 [{report_type}]")
			
 
				-
			
 
				-    # 逐批发送
			
 
				-    for i, batch_content in enumerate(batches, 1):
			
 
				-        # 转换 Markdown 到 mrkdwn 格式
			
 
				-        mrkdwn_content = convert_markdown_to_mrkdwn(batch_content)
			
 
				-
			
 
				-        batch_size = len(mrkdwn_content.encode("utf-8"))
			
 
				-        print(
			
 
				-            f"发送{log_prefix}第 {i}/{len(batches)} 批次，大小：{batch_size} 字节 [{report_type}]"
			
 
				-        )
			
 
				-
			
 
				-        # 构建 Slack payload（使用简单的 text 字段，支持 mrkdwn）
			
 
				-        payload = {
			
 
				-            "text": mrkdwn_content
			
 
				-        }
			
 
				-
			
 
				-        try:
			
 
				-            response = requests.post(
			
 
				-                webhook_url, headers=headers, json=payload, proxies=proxies, timeout=30
			
 
				-            )
			
 
				-
			
 
				-            # Slack Incoming Webhooks 成功时返回 "ok" 文本
			
 
				-            if response.status_code == 200 and response.text == "ok":
			
 
				-                print(f"{log_prefix}第 {i}/{len(batches)} 批次发送成功 [{report_type}]")
			
 
				-                # 批次间间隔
			
 
				-                if i < len(batches):
			
 
				-                    time.sleep(CONFIG["BATCH_SEND_INTERVAL"])
			
 
				-            else:
			
 
				-                error_msg = response.text if response.text else f"状态码：{response.status_code}"
			
 
				-                print(
			
 
				-                    f"{log_prefix}第 {i}/{len(batches)} 批次发送失败 [{report_type}]，错误：{error_msg}"
			
 
				-                )
			
 
				-                return False
			
 
				-        except Exception as e:
			
 
				-            print(f"{log_prefix}第 {i}/{len(batches)} 批次发送出错 [{report_type}]：{e}")
			
 
				-            return False
			
 
				-
			
 
				-    print(f"{log_prefix}所有 {len(batches)} 批次发送完成 [{report_type}]")
			
 
				-    return True
			
 
				-
			
 
				-
			
 
				-# === 主分析器 ===
			
 
				-class NewsAnalyzer:
			
 
				-    """新闻分析器"""
			
 
				-
			
 
				-    # 模式策略定义
			
 
				-    MODE_STRATEGIES = {
			
 
				-        "incremental": {
			
 
				-            "mode_name": "增量模式",
			
 
				-            "description": "增量模式（只关注新增新闻，无新增时不推送）",
			
 
				-            "realtime_report_type": "实时增量",
			
 
				-            "summary_report_type": "当日汇总",
			
 
				-            "should_send_realtime": True,
			
 
				-            "should_generate_summary": True,
			
 
				-            "summary_mode": "daily",
			
 
				-        },
			
 
				-        "current": {
			
 
				-            "mode_name": "当前榜单模式",
			
 
				-            "description": "当前榜单模式（当前榜单匹配新闻 + 新增新闻区域 + 按时推送）",
			
 
				-            "realtime_report_type": "实时当前榜单",
			
 
				-            "summary_report_type": "当前榜单汇总",
			
 
				-            "should_send_realtime": True,
			
 
				-            "should_generate_summary": True,
			
 
				-            "summary_mode": "current",
			
 
				-        },
			
 
				-        "daily": {
			
 
				-            "mode_name": "当日汇总模式",
			
 
				-            "description": "当日汇总模式（所有匹配新闻 + 新增新闻区域 + 按时推送）",
			
 
				-            "realtime_report_type": "",
			
 
				-            "summary_report_type": "当日汇总",
			
 
				-            "should_send_realtime": False,
			
 
				-            "should_generate_summary": True,
			
 
				-            "summary_mode": "daily",
			
 
				-        },
			
 
				-    }
			
 
				-
			
 
				-    def __init__(self):
			
 
				-        self.request_interval = CONFIG["REQUEST_INTERVAL"]
			
 
				-        self.report_mode = CONFIG["REPORT_MODE"]
			
 
				-        self.rank_threshold = CONFIG["RANK_THRESHOLD"]
			
 
				-        self.is_github_actions = os.environ.get("GITHUB_ACTIONS") == "true"
			
 
				-        self.is_docker_container = self._detect_docker_environment()
			
 
				-        self.update_info = None
			
 
				-        self.proxy_url = None
			
 
				-        self._setup_proxy()
			
 
				-        self.data_fetcher = DataFetcher(self.proxy_url)
			
 
				-
			
 
				-        if self.is_github_actions:
			
 
				-            self._check_version_update()
			
 
				-
			
 
				-    def _detect_docker_environment(self) -> bool:
			
 
				-        """检测是否运行在 Docker 容器中"""
			
 
				-        try:
			
 
				-            if os.environ.get("DOCKER_CONTAINER") == "true":
			
 
				-                return True
			
 
				-
			
 
				-            if os.path.exists("/.dockerenv"):
			
 
				-                return True
			
 
				-
			
 
				-            return False
			
 
				-        except Exception:
			
 
				-            return False
			
 
				-
			
 
				-    def _should_open_browser(self) -> bool:
			
 
				-        """判断是否应该打开浏览器"""
			
 
				-        return not self.is_github_actions and not self.is_docker_container
			
 
				-
			
 
				-    def _setup_proxy(self) -> None:
			
 
				-        """设置代理配置"""
			
 
				-        if not self.is_github_actions and CONFIG["USE_PROXY"]:
			
 
				-            self.proxy_url = CONFIG["DEFAULT_PROXY"]
			
 
				-            print("本地环境，使用代理")
			
 
				-        elif not self.is_github_actions and not CONFIG["USE_PROXY"]:
			
 
				-            print("本地环境，未启用代理")
			
 
				-        else:
			
 
				-            print("GitHub Actions环境，不使用代理")
			
 
				-
			
 
				-    def _check_version_update(self) -> None:
			
 
				-        """检查版本更新"""
			
 
				-        try:
			
 
				-            need_update, remote_version = check_version_update(
			
 
				-                VERSION, CONFIG["VERSION_CHECK_URL"], self.proxy_url
			
 
				-            )
			
 
				-
			
 
				-            if need_update and remote_version:
			
 
				-                self.update_info = {
			
 
				-                    "current_version": VERSION,
			
 
				-                    "remote_version": remote_version,
			
 
				-                }
			
 
				-                print(f"发现新版本: {remote_version} (当前: {VERSION})")
			
 
				-            else:
			
 
				-                print("版本检查完成，当前为最新版本")
			
 
				-        except Exception as e:
			
 
				-            print(f"版本检查出错: {e}")
			
 
				-
			
 
				-    def _get_mode_strategy(self) -> Dict:
			
 
				-        """获取当前模式的策略配置"""
			
 
				-        return self.MODE_STRATEGIES.get(self.report_mode, self.MODE_STRATEGIES["daily"])
			
 
				-
			
 
				-    def _has_notification_configured(self) -> bool:
			
 
				-        """检查是否配置了任何通知渠道"""
			
 
				-        return any(
			
 
				-            [
			
 
				-                CONFIG["FEISHU_WEBHOOK_URL"],
			
 
				-                CONFIG["DINGTALK_WEBHOOK_URL"],
			
 
				-                CONFIG["WEWORK_WEBHOOK_URL"],
			
 
				-                (CONFIG["TELEGRAM_BOT_TOKEN"] and CONFIG["TELEGRAM_CHAT_ID"]),
			
 
				-                (
			
 
				-                    CONFIG["EMAIL_FROM"]
			
 
				-                    and CONFIG["EMAIL_PASSWORD"]
			
 
				-                    and CONFIG["EMAIL_TO"]
			
 
				-                ),
			
 
				-                (CONFIG["NTFY_SERVER_URL"] and CONFIG["NTFY_TOPIC"]),
			
 
				-                CONFIG["BARK_URL"],
			
 
				-                CONFIG["SLACK_WEBHOOK_URL"],
			
 
				-            ]
			
 
				-        )
			
 
				-
			
 
				-    def _has_valid_content(
			
 
				-        self, stats: List[Dict], new_titles: Optional[Dict] = None
			
 
				-    ) -> bool:
			
 
				-        """检查是否有有效的新闻内容"""
			
 
				-        if self.report_mode in ["incremental", "current"]:
			
 
				-            # 增量模式和current模式下，只要stats有内容就说明有匹配的新闻
			
 
				-            return any(stat["count"] > 0 for stat in stats)
			
 
				-        else:
			
 
				-            # 当日汇总模式下，检查是否有匹配的频率词新闻或新增新闻
			
 
				-            has_matched_news = any(stat["count"] > 0 for stat in stats)
			
 
				-            has_new_news = bool(
			
 
				-                new_titles and any(len(titles) > 0 for titles in new_titles.values())
			
 
				-            )
			
 
				-            return has_matched_news or has_new_news
			
 
				-
			
 
				-    def _load_analysis_data(
			
 
				-        self,
			
 
				-    ) -> Optional[Tuple[Dict, Dict, Dict, Dict, List, List]]:
			
 
				-        """统一的数据加载和预处理，使用当前监控平台列表过滤历史数据"""
			
 
				-        try:
			
 
				-            # 获取当前配置的监控平台ID列表
			
 
				-            current_platform_ids = []
			
 
				-            for platform in CONFIG["PLATFORMS"]:
			
 
				-                current_platform_ids.append(platform["id"])
			
 
				-
			
 
				-            print(f"当前监控平台: {current_platform_ids}")
			
 
				-
			
 
				-            all_results, id_to_name, title_info = read_all_today_titles(
			
 
				-                current_platform_ids
			
 
				-            )
			
 
				-
			
 
				-            if not all_results:
			
 
				-                print("没有找到当天的数据")
			
 
				-                return None
			
 
				-
			
 
				-            total_titles = sum(len(titles) for titles in all_results.values())
			
 
				-            print(f"读取到 {total_titles} 个标题（已按当前监控平台过滤）")
			
 
				-
			
 
				-            new_titles = detect_latest_new_titles(current_platform_ids)
			
 
				-            word_groups, filter_words, global_filters = load_frequency_words()
			
 
				-
			
 
				-            return (
			
 
				-                all_results,
			
 
				-                id_to_name,
			
 
				-                title_info,
			
 
				-                new_titles,
			
 
				-                word_groups,
			
 
				-                filter_words,
			
 
				-                global_filters,
			
 
				-            )
			
 
				-        except Exception as e:
			
 
				-            print(f"数据加载失败: {e}")
			
 
				-            return None
			
 
				-
			
 
				-    def _prepare_current_title_info(self, results: Dict, time_info: str) -> Dict:
			
 
				-        """从当前抓取结果构建标题信息"""
			
 
				-        title_info = {}
			
 
				-        for source_id, titles_data in results.items():
			
 
				-            title_info[source_id] = {}
			
 
				-            for title, title_data in titles_data.items():
			
 
				-                ranks = title_data.get("ranks", [])
			
 
				-                url = title_data.get("url", "")
			
 
				-                mobile_url = title_data.get("mobileUrl", "")
			
 
				-
			
 
				-                title_info[source_id][title] = {
			
 
				-                    "first_time": time_info,
			
 
				-                    "last_time": time_info,
			
 
				-                    "count": 1,
			
 
				-                    "ranks": ranks,
			
 
				-                    "url": url,
			
 
				-                    "mobileUrl": mobile_url,
			
 
				-                }
			
 
				-        return title_info
			
 
				-
			
 
				-    def _run_analysis_pipeline(
			
 
				-        self,
			
 
				-        data_source: Dict,
			
 
				-        mode: str,
			
 
				-        title_info: Dict,
			
 
				-        new_titles: Dict,
			
 
				-        word_groups: List[Dict],
			
 
				-        filter_words: List[str],
			
 
				-        id_to_name: Dict,
			
 
				-        failed_ids: Optional[List] = None,
			
 
				-        is_daily_summary: bool = False,
			
 
				-        global_filters: Optional[List[str]] = None,
			
 
				-    ) -> Tuple[List[Dict], str]:
			
 
				-        """统一的分析流水线：数据处理 → 统计计算 → HTML生成"""
			
 
				-
			
 
				-        # 统计计算
			
 
				-        stats, total_titles = count_word_frequency(
			
 
				-            data_source,
			
 
				-            word_groups,
			
 
				-            filter_words,
			
 
				-            id_to_name,
			
 
				-            title_info,
			
 
				-            self.rank_threshold,
			
 
				-            new_titles,
			
 
				-            mode=mode,
			
 
				-            global_filters=global_filters,
			
 
				-        )
			
 
				-
			
 
				-        # HTML生成
			
 
				-        html_file = generate_html_report(
			
 
				-            stats,
			
 
				-            total_titles,
			
 
				-            failed_ids=failed_ids,
			
 
				-            new_titles=new_titles,
			
 
				-            id_to_name=id_to_name,
			
 
				-            mode=mode,
			
 
				-            is_daily_summary=is_daily_summary,
			
 
				-            update_info=self.update_info if CONFIG["SHOW_VERSION_UPDATE"] else None,
			
 
				-        )
			
 
				-
			
 
				-        return stats, html_file
			
 
				-
			
 
				-    def _send_notification_if_needed(
			
 
				-        self,
			
 
				-        stats: List[Dict],
			
 
				-        report_type: str,
			
 
				-        mode: str,
			
 
				-        failed_ids: Optional[List] = None,
			
 
				-        new_titles: Optional[Dict] = None,
			
 
				-        id_to_name: Optional[Dict] = None,
			
 
				-        html_file_path: Optional[str] = None,
			
 
				-    ) -> bool:
			
 
				-        """统一的通知发送逻辑，包含所有判断条件"""
			
 
				-        has_notification = self._has_notification_configured()
			
 
				-
			
 
				-        if (
			
 
				-            CONFIG["ENABLE_NOTIFICATION"]
			
 
				-            and has_notification
			
 
				-            and self._has_valid_content(stats, new_titles)
			
 
				-        ):
			
 
				-            send_to_notifications(
			
 
				-                stats,
			
 
				-                failed_ids or [],
			
 
				-                report_type,
			
 
				-                new_titles,
			
 
				-                id_to_name,
			
 
				-                self.update_info,
			
 
				-                self.proxy_url,
			
 
				-                mode=mode,
			
 
				-                html_file_path=html_file_path,
			
 
				-            )
			
 
				-            return True
			
 
				-        elif CONFIG["ENABLE_NOTIFICATION"] and not has_notification:
			
 
				-            print("⚠️ 警告：通知功能已启用但未配置任何通知渠道，将跳过通知发送")
			
 
				-        elif not CONFIG["ENABLE_NOTIFICATION"]:
			
 
				-            print(f"跳过{report_type}通知：通知功能已禁用")
			
 
				-        elif (
			
 
				-            CONFIG["ENABLE_NOTIFICATION"]
			
 
				-            and has_notification
			
 
				-            and not self._has_valid_content(stats, new_titles)
			
 
				-        ):
			
 
				-            mode_strategy = self._get_mode_strategy()
			
 
				-            if "实时" in report_type:
			
 
				-                print(
			
 
				-                    f"跳过实时推送通知：{mode_strategy['mode_name']}下未检测到匹配的新闻"
			
 
				-                )
			
 
				-            else:
			
 
				-                print(
			
 
				-                    f"跳过{mode_strategy['summary_report_type']}通知：未匹配到有效的新闻内容"
			
 
				-                )
			
 
				-
			
 
				-        return False
			
 
				-
			
 
				-    def _generate_summary_report(self, mode_strategy: Dict) -> Optional[str]:
			
 
				-        """生成汇总报告（带通知）"""
			
 
				-        summary_type = (
			
 
				-            "当前榜单汇总" if mode_strategy["summary_mode"] == "current" else "当日汇总"
			
 
				-        )
			
 
				-        print(f"生成{summary_type}报告...")
			
 
				-
			
 
				-        # 加载分析数据
			
 
				-        analysis_data = self._load_analysis_data()
			
 
				-        if not analysis_data:
			
 
				-            return None
			
 
				-
			
 
				-        all_results, id_to_name, title_info, new_titles, word_groups, filter_words, global_filters = (
			
 
				-            analysis_data
			
 
				-        )
			
 
				-
			
 
				-        # 运行分析流水线
			
 
				-        stats, html_file = self._run_analysis_pipeline(
			
 
				-            all_results,
			
 
				-            mode_strategy["summary_mode"],
			
 
				-            title_info,
			
 
				-            new_titles,
			
 
				-            word_groups,
			
 
				-            filter_words,
			
 
				-            id_to_name,
			
 
				-            is_daily_summary=True,
			
 
				-            global_filters=global_filters,
			
 
				-        )
			
 
				-
			
 
				-        print(f"{summary_type}报告已生成: {html_file}")
			
 
				-
			
 
				-        # 发送通知
			
 
				-        self._send_notification_if_needed(
			
 
				-            stats,
			
 
				-            mode_strategy["summary_report_type"],
			
 
				-            mode_strategy["summary_mode"],
			
 
				-            failed_ids=[],
			
 
				-            new_titles=new_titles,
			
 
				-            id_to_name=id_to_name,
			
 
				-            html_file_path=html_file,
			
 
				-        )
			
 
				-
			
 
				-        return html_file
			
 
				-
			
 
				-    def _generate_summary_html(self, mode: str = "daily") -> Optional[str]:
			
 
				-        """生成汇总HTML"""
			
 
				-        summary_type = "当前榜单汇总" if mode == "current" else "当日汇总"
			
 
				-        print(f"生成{summary_type}HTML...")
			
 
				-
			
 
				-        # 加载分析数据
			
 
				-        analysis_data = self._load_analysis_data()
			
 
				-        if not analysis_data:
			
 
				-            return None
			
 
				-
			
 
				-        all_results, id_to_name, title_info, new_titles, word_groups, filter_words, global_filters = (
			
 
				-            analysis_data
			
 
				-        )
			
 
				-
			
 
				-        # 运行分析流水线
			
 
				-        _, html_file = self._run_analysis_pipeline(
			
 
				-            all_results,
			
 
				-            mode,
			
 
				-            title_info,
			
 
				-            new_titles,
			
 
				-            word_groups,
			
 
				-            filter_words,
			
 
				-            id_to_name,
			
 
				-            is_daily_summary=True,
			
 
				-            global_filters=global_filters,
			
 
				-        )
			
 
				-
			
 
				-        print(f"{summary_type}HTML已生成: {html_file}")
			
 
				-        return html_file
			
 
				-
			
 
				-    def _initialize_and_check_config(self) -> None:
			
 
				-        """通用初始化和配置检查"""
			
 
				-        now = get_beijing_time()
			
 
				-        print(f"当前北京时间: {now.strftime('%Y-%m-%d %H:%M:%S')}")
			
 
				-
			
 
				-        if not CONFIG["ENABLE_CRAWLER"]:
			
 
				-            print("爬虫功能已禁用（ENABLE_CRAWLER=False），程序退出")
			
 
				-            return
			
 
				-
			
 
				-        has_notification = self._has_notification_configured()
			
 
				-        if not CONFIG["ENABLE_NOTIFICATION"]:
			
 
				-            print("通知功能已禁用（ENABLE_NOTIFICATION=False），将只进行数据抓取")
			
 
				-        elif not has_notification:
			
 
				-            print("未配置任何通知渠道，将只进行数据抓取，不发送通知")
			
 
				-        else:
			
 
				-            print("通知功能已启用，将发送通知")
			
 
				-
			
 
				-        mode_strategy = self._get_mode_strategy()
			
 
				-        print(f"报告模式: {self.report_mode}")
			
 
				-        print(f"运行模式: {mode_strategy['description']}")
			
 
				-
			
 
				-    def _crawl_data(self) -> Tuple[Dict, Dict, List]:
			
 
				-        """执行数据爬取"""
			
 
				-        ids = []
			
 
				-        for platform in CONFIG["PLATFORMS"]:
			
 
				-            if "name" in platform:
			
 
				-                ids.append((platform["id"], platform["name"]))
			
 
				-            else:
			
 
				-                ids.append(platform["id"])
			
 
				-
			
 
				-        print(
			
 
				-            f"配置的监控平台: {[p.get('name', p['id']) for p in CONFIG['PLATFORMS']]}"
			
 
				-        )
			
 
				-        print(f"开始爬取数据，请求间隔 {self.request_interval} 毫秒")
			
 
				-        ensure_directory_exists("output")
			
 
				-
			
 
				-        results, id_to_name, failed_ids = self.data_fetcher.crawl_websites(
			
 
				-            ids, self.request_interval
			
 
				-        )
			
 
				-
			
 
				-        title_file = save_titles_to_file(results, id_to_name, failed_ids)
			
 
				-        print(f"标题已保存到: {title_file}")
			
 
				-
			
 
				-        return results, id_to_name, failed_ids
			
 
				-
			
 
				-    def _execute_mode_strategy(
			
 
				-        self, mode_strategy: Dict, results: Dict, id_to_name: Dict, failed_ids: List
			
 
				-    ) -> Optional[str]:
			
 
				-        """执行模式特定逻辑"""
			
 
				-        # 获取当前监控平台ID列表
			
 
				-        current_platform_ids = [platform["id"] for platform in CONFIG["PLATFORMS"]]
			
 
				-
			
 
				-        new_titles = detect_latest_new_titles(current_platform_ids)
			
 
				-        time_info = Path(save_titles_to_file(results, id_to_name, failed_ids)).stem
			
 
				-        word_groups, filter_words, global_filters = load_frequency_words()
			
 
				-
			
 
				-        # current模式下，实时推送需要使用完整的历史数据来保证统计信息的完整性
			
 
				-        if self.report_mode == "current":
			
 
				-            # 加载完整的历史数据（已按当前平台过滤）
			
 
				-            analysis_data = self._load_analysis_data()
			
 
				-            if analysis_data:
			
 
				-                (
			
 
				-                    all_results,
			
 
				-                    historical_id_to_name,
			
 
				-                    historical_title_info,
			
 
				-                    historical_new_titles,
			
 
				-                    _,
			
 
				-                    _,
			
 
				-                    _,
			
 
				-                ) = analysis_data
			
 
				-
			
 
				-                print(
			
 
				-                    f"current模式：使用过滤后的历史数据，包含平台：{list(all_results.keys())}"
			
 
				-                )
			
 
				-
			
 
				-                stats, html_file = self._run_analysis_pipeline(
			
 
				-                    all_results,
			
 
				-                    self.report_mode,
			
 
				-                    historical_title_info,
			
 
				-                    historical_new_titles,
			
 
				-                    word_groups,
			
 
				-                    filter_words,
			
 
				-                    historical_id_to_name,
			
 
				-                    failed_ids=failed_ids,
			
 
				-                    global_filters=global_filters,
			
 
				-                )
			
 
				-
			
 
				-                combined_id_to_name = {**historical_id_to_name, **id_to_name}
			
 
				-
			
 
				-                print(f"HTML报告已生成: {html_file}")
			
 
				-
			
 
				-                # 发送实时通知（使用完整历史数据的统计结果）
			
 
				-                summary_html = None
			
 
				-                if mode_strategy["should_send_realtime"]:
			
 
				-                    self._send_notification_if_needed(
			
 
				-                        stats,
			
 
				-                        mode_strategy["realtime_report_type"],
			
 
				-                        self.report_mode,
			
 
				-                        failed_ids=failed_ids,
			
 
				-                        new_titles=historical_new_titles,
			
 
				-                        id_to_name=combined_id_to_name,
			
 
				-                        html_file_path=html_file,
			
 
				-                    )
			
 
				-            else:
			
 
				-                print("❌ 严重错误：无法读取刚保存的数据文件")
			
 
				-                raise RuntimeError("数据一致性检查失败：保存后立即读取失败")
			
 
				-        else:
			
 
				-            title_info = self._prepare_current_title_info(results, time_info)
			
 
				-            stats, html_file = self._run_analysis_pipeline(
			
 
				-                results,
			
 
				-                self.report_mode,
			
 
				-                title_info,
			
 
				-                new_titles,
			
 
				-                word_groups,
			
 
				-                filter_words,
			
 
				-                id_to_name,
			
 
				-                failed_ids=failed_ids,
			
 
				-                global_filters=global_filters,
			
 
				-            )
			
 
				-            print(f"HTML报告已生成: {html_file}")
			
 
				-
			
 
				-            # 发送实时通知（如果需要）
			
 
				-            summary_html = None
			
 
				-            if mode_strategy["should_send_realtime"]:
			
 
				-                self._send_notification_if_needed(
			
 
				-                    stats,
			
 
				-                    mode_strategy["realtime_report_type"],
			
 
				-                    self.report_mode,
			
 
				-                    failed_ids=failed_ids,
			
 
				-                    new_titles=new_titles,
			
 
				-                    id_to_name=id_to_name,
			
 
				-                    html_file_path=html_file,
			
 
				-                )
			
 
				-
			
 
				-        # 生成汇总报告（如果需要）
			
 
				-        summary_html = None
			
 
				-        if mode_strategy["should_generate_summary"]:
			
 
				-            if mode_strategy["should_send_realtime"]:
			
 
				-                # 如果已经发送了实时通知，汇总只生成HTML不发送通知
			
 
				-                summary_html = self._generate_summary_html(
			
 
				-                    mode_strategy["summary_mode"]
			
 
				-                )
			
 
				-            else:
			
 
				-                # daily模式：直接生成汇总报告并发送通知
			
 
				-                summary_html = self._generate_summary_report(mode_strategy)
			
 
				-
			
 
				-        # 打开浏览器（仅在非容器环境）
			
 
				-        if self._should_open_browser() and html_file:
			
 
				-            if summary_html:
			
 
				-                summary_url = "file://" + str(Path(summary_html).resolve())
			
 
				-                print(f"正在打开汇总报告: {summary_url}")
			
 
				-                webbrowser.open(summary_url)
			
 
				-            else:
			
 
				-                file_url = "file://" + str(Path(html_file).resolve())
			
 
				-                print(f"正在打开HTML报告: {file_url}")
			
 
				-                webbrowser.open(file_url)
			
 
				-        elif self.is_docker_container and html_file:
			
 
				-            if summary_html:
			
 
				-                print(f"汇总报告已生成（Docker环境）: {summary_html}")
			
 
				-            else:
			
 
				-                print(f"HTML报告已生成（Docker环境）: {html_file}")
			
 
				-
			
 
				-        return summary_html
			
 
				-
			
 
				-    def run(self) -> None:
			
 
				-        """执行分析流程"""
			
 
				-        try:
			
 
				-            self._initialize_and_check_config()
			
 
				-
			
 
				-            mode_strategy = self._get_mode_strategy()
			
 
				-
			
 
				-            results, id_to_name, failed_ids = self._crawl_data()
			
 
				-
			
 
				-            self._execute_mode_strategy(mode_strategy, results, id_to_name, failed_ids)
			
 
				-
			
 
				-        except Exception as e:
			
 
				-            print(f"分析流程执行出错: {e}")
			
 
				-            raise
			
 
				-
			
 
				-
			
 
				-def main():
			
 
				-    try:
			
 
				-        analyzer = NewsAnalyzer()
			
 
				-        analyzer.run()
			
 
				-    except FileNotFoundError as e:
			
 
				-        print(f"❌ 配置文件错误: {e}")
			
 
				-        print("\n请确保以下文件存在:")
			
 
				-        print("  • config/config.yaml")
			
 
				-        print("  • config/frequency_words.txt")
			
 
				-        print("\n参考项目文档进行正确配置")
			
 
				-    except Exception as e:
			
 
				-        print(f"❌ 程序运行错误: {e}")
			
 
				-        raise
			
 
				-
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    main()
			
--- a/mcp_server/__init__.py
+++ b/mcp_server/__init__.py
@@ -4,4 +4,4 @@ TrendRadar MCP Server
 
				 提供基于MCP协议的新闻聚合数据查询和系统管理接口。
			
 
				 """
			
 
				 
			
 
				-__version__ = "1.0.0"
			
 
				+__version__ = "1.1.0"
			
--- a/mcp_server/server.py
+++ b/mcp_server/server.py
@@ -15,6 +15,7 @@ from .tools.analytics import AnalyticsTools
 
				 from .tools.search_tools import SearchTools
			
 
				 from .tools.config_mgmt import ConfigManagementTools
			
 
				 from .tools.system import SystemManagementTools
			
 
				+from .tools.storage_sync import StorageSyncTools
			
 
				 from .utils.date_parser import DateParser
			
 
				 from .utils.errors import MCPError
			
 
				 
			
@@ -34,6 +35,7 @@ def _get_tools(project_root: Optional[str] = None):
 
				         _tools_instances['search'] = SearchTools(project_root)
			
 
				         _tools_instances['config'] = ConfigManagementTools(project_root)
			
 
				         _tools_instances['system'] = SystemManagementTools(project_root)
			
 
				+        _tools_instances['storage'] = StorageSyncTools(project_root)
			
 
				     return _tools_instances
			
 
				 
			
 
				 
			
@@ -657,6 +659,127 @@ async def trigger_crawl(
 
				     return json.dumps(result, ensure_ascii=False, indent=2)
			
 
				 
			
 
				 
			
 
				+# ==================== 存储同步工具 ====================
			
 
				+
			
 
				+@mcp.tool
			
 
				+async def sync_from_remote(
			
 
				+    days: int = 7
			
 
				+) -> str:
			
 
				+    """
			
 
				+    从远程存储拉取数据到本地
			
 
				+
			
 
				+    用于 MCP Server 等场景：爬虫存到远程云存储（如 Cloudflare R2），
			
 
				+    MCP Server 拉取到本地进行分析查询。
			
 
				+
			
 
				+    Args:
			
 
				+        days: 拉取最近 N 天的数据，默认 7 天
			
 
				+              - 0: 不拉取
			
 
				+              - 7: 拉取最近一周的数据
			
 
				+              - 30: 拉取最近一个月的数据
			
 
				+
			
 
				+    Returns:
			
 
				+        JSON格式的同步结果，包含：
			
 
				+        - success: 是否成功
			
 
				+        - synced_files: 成功同步的文件数量
			
 
				+        - synced_dates: 成功同步的日期列表
			
 
				+        - skipped_dates: 跳过的日期（本地已存在）
			
 
				+        - failed_dates: 失败的日期及错误信息
			
 
				+        - message: 操作结果描述
			
 
				+
			
 
				+    Examples:
			
 
				+        - sync_from_remote()  # 拉取最近7天
			
 
				+        - sync_from_remote(days=30)  # 拉取最近30天
			
 
				+
			
 
				+    Note:
			
 
				+        需要在 config/config.yaml 中配置远程存储（storage.remote）或设置环境变量：
			
 
				+        - S3_ENDPOINT_URL: 服务端点
			
 
				+        - S3_BUCKET_NAME: 存储桶名称
			
 
				+        - S3_ACCESS_KEY_ID: 访问密钥 ID
			
 
				+        - S3_SECRET_ACCESS_KEY: 访问密钥
			
 
				+    """
			
 
				+    tools = _get_tools()
			
 
				+    result = tools['storage'].sync_from_remote(days=days)
			
 
				+    return json.dumps(result, ensure_ascii=False, indent=2)
			
 
				+
			
 
				+
			
 
				+@mcp.tool
			
 
				+async def get_storage_status() -> str:
			
 
				+    """
			
 
				+    获取存储配置和状态
			
 
				+
			
 
				+    查看当前存储后端配置、本地和远程存储的状态信息。
			
 
				+
			
 
				+    Returns:
			
 
				+        JSON格式的存储状态信息，包含：
			
 
				+        - backend: 当前使用的后端类型（local/remote/auto）
			
 
				+        - local: 本地存储状态
			
 
				+            - data_dir: 数据目录
			
 
				+            - retention_days: 保留天数
			
 
				+            - total_size: 总大小
			
 
				+            - date_count: 日期数量
			
 
				+            - earliest_date: 最早日期
			
 
				+            - latest_date: 最新日期
			
 
				+        - remote: 远程存储状态
			
 
				+            - configured: 是否已配置
			
 
				+            - endpoint_url: 服务端点
			
 
				+            - bucket_name: 存储桶名称
			
 
				+            - date_count: 远程日期数量
			
 
				+        - pull: 拉取配置
			
 
				+            - enabled: 是否启用自动拉取
			
 
				+            - days: 自动拉取天数
			
 
				+
			
 
				+    Examples:
			
 
				+        - get_storage_status()  # 查看所有存储状态
			
 
				+    """
			
 
				+    tools = _get_tools()
			
 
				+    result = tools['storage'].get_storage_status()
			
 
				+    return json.dumps(result, ensure_ascii=False, indent=2)
			
 
				+
			
 
				+
			
 
				+@mcp.tool
			
 
				+async def list_available_dates(
			
 
				+    source: str = "both"
			
 
				+) -> str:
			
 
				+    """
			
 
				+    列出本地/远程可用的日期范围
			
 
				+
			
 
				+    查看本地和远程存储中有哪些日期的数据可用，
			
 
				+    帮助了解数据覆盖范围和同步状态。
			
 
				+
			
 
				+    Args:
			
 
				+        source: 数据来源，可选值：
			
 
				+            - "local": 仅列出本地可用日期
			
 
				+            - "remote": 仅列出远程可用日期
			
 
				+            - "both": 同时列出两者并进行对比（默认）
			
 
				+
			
 
				+    Returns:
			
 
				+        JSON格式的日期列表，包含：
			
 
				+        - local: 本地日期信息（如果 source 包含 local）
			
 
				+            - dates: 日期列表（按时间倒序）
			
 
				+            - count: 日期数量
			
 
				+            - earliest: 最早日期
			
 
				+            - latest: 最新日期
			
 
				+        - remote: 远程日期信息（如果 source 包含 remote）
			
 
				+            - configured: 是否已配置远程存储
			
 
				+            - dates: 日期列表
			
 
				+            - count: 日期数量
			
 
				+            - earliest: 最早日期
			
 
				+            - latest: 最新日期
			
 
				+        - comparison: 对比结果（仅当 source="both" 时）
			
 
				+            - only_local: 仅本地存在的日期
			
 
				+            - only_remote: 仅远程存在的日期
			
 
				+            - both: 两边都存在的日期
			
 
				+
			
 
				+    Examples:
			
 
				+        - list_available_dates()  # 查看本地和远程的对比
			
 
				+        - list_available_dates(source="local")  # 仅查看本地
			
 
				+        - list_available_dates(source="remote")  # 仅查看远程
			
 
				+    """
			
 
				+    tools = _get_tools()
			
 
				+    result = tools['storage'].list_available_dates(source=source)
			
 
				+    return json.dumps(result, ensure_ascii=False, indent=2)
			
 
				+
			
 
				+
			
 
				 # ==================== 启动入口 ====================
			
 
				 
			
 
				 def run_server(
			
@@ -721,6 +844,11 @@ def run_server(
 
				     print("    11. get_current_config      - 获取当前系统配置")
			
 
				     print("    12. get_system_status       - 获取系统运行状态")
			
 
				     print("    13. trigger_crawl           - 手动触发爬取任务")
			
 
				+    print()
			
 
				+    print("    === 存储同步工具 ===")
			
 
				+    print("    14. sync_from_remote        - 从远程存储拉取数据到本地")
			
 
				+    print("    15. get_storage_status      - 获取存储配置和状态")
			
 
				+    print("    16. list_available_dates    - 列出本地/远程可用日期")
			
 
				     print("=" * 60)
			
 
				     print()
			
 
				 
			
--- a/mcp_server/services/data_service.py
+++ b/mcp_server/services/data_service.py
@@ -517,24 +517,55 @@ class DataService:
 
				         # 遍历日期文件夹
			
 
				         for date_folder in output_dir.iterdir():
			
 
				             if date_folder.is_dir() and not date_folder.name.startswith('.'):
			
 
				-                # 解析日期（格式: YYYY年MM月DD日）
			
 
				-                try:
			
 
				-                    date_match = re.match(r'(\d{4})年(\d{2})月(\d{2})日', date_folder.name)
			
 
				-                    if date_match:
			
 
				-                        folder_date = datetime(
			
 
				-                            int(date_match.group(1)),
			
 
				-                            int(date_match.group(2)),
			
 
				-                            int(date_match.group(3))
			
 
				-                        )
			
 
				-                        available_dates.append(folder_date)
			
 
				-                except Exception:
			
 
				-                    pass
			
 
				+                folder_date = self._parse_date_folder_name(date_folder.name)
			
 
				+                if folder_date:
			
 
				+                    available_dates.append(folder_date)
			
 
				 
			
 
				         if not available_dates:
			
 
				             return (None, None)
			
 
				 
			
 
				         return (min(available_dates), max(available_dates))
			
 
				 
			
 
				+    def _parse_date_folder_name(self, folder_name: str) -> Optional[datetime]:
			
 
				+        """
			
 
				+        解析日期文件夹名称（兼容中文和ISO格式）
			
 
				+
			
 
				+        支持两种格式：
			
 
				+        - 中文格式：YYYY年MM月DD日
			
 
				+        - ISO格式：YYYY-MM-DD
			
 
				+
			
 
				+        Args:
			
 
				+            folder_name: 文件夹名称
			
 
				+
			
 
				+        Returns:
			
 
				+            datetime 对象，解析失败返回 None
			
 
				+        """
			
 
				+        # 尝试中文格式：YYYY年MM月DD日
			
 
				+        chinese_match = re.match(r'(\d{4})年(\d{2})月(\d{2})日', folder_name)
			
 
				+        if chinese_match:
			
 
				+            try:
			
 
				+                return datetime(
			
 
				+                    int(chinese_match.group(1)),
			
 
				+                    int(chinese_match.group(2)),
			
 
				+                    int(chinese_match.group(3))
			
 
				+                )
			
 
				+            except ValueError:
			
 
				+                pass
			
 
				+
			
 
				+        # 尝试 ISO 格式：YYYY-MM-DD
			
 
				+        iso_match = re.match(r'(\d{4})-(\d{2})-(\d{2})', folder_name)
			
 
				+        if iso_match:
			
 
				+            try:
			
 
				+                return datetime(
			
 
				+                    int(iso_match.group(1)),
			
 
				+                    int(iso_match.group(2)),
			
 
				+                    int(iso_match.group(3))
			
 
				+                )
			
 
				+            except ValueError:
			
 
				+                pass
			
 
				+
			
 
				+        return None
			
 
				+
			
 
				     def get_system_status(self) -> Dict:
			
 
				         """
			
 
				         获取系统运行状态
			
@@ -553,26 +584,14 @@ class DataService:
 
				         if output_dir.exists():
			
 
				             # 遍历日期文件夹
			
 
				             for date_folder in output_dir.iterdir():
			
 
				-                if date_folder.is_dir():
			
 
				-                    # 解析日期
			
 
				-                    try:
			
 
				-                        date_str = date_folder.name
			
 
				-                        # 格式: YYYY年MM月DD日
			
 
				-                        date_match = re.match(r'(\d{4})年(\d{2})月(\d{2})日', date_str)
			
 
				-                        if date_match:
			
 
				-                            folder_date = datetime(
			
 
				-                                int(date_match.group(1)),
			
 
				-                                int(date_match.group(2)),
			
 
				-                                int(date_match.group(3))
			
 
				-                            )
			
 
				-
			
 
				-                            if oldest_record is None or folder_date < oldest_record:
			
 
				-                                oldest_record = folder_date
			
 
				-                            if latest_record is None or folder_date > latest_record:
			
 
				-                                latest_record = folder_date
			
 
				-
			
 
				-                    except:
			
 
				-                        pass
			
 
				+                if date_folder.is_dir() and not date_folder.name.startswith('.'):
			
 
				+                    # 解析日期（兼容中文和ISO格式）
			
 
				+                    folder_date = self._parse_date_folder_name(date_folder.name)
			
 
				+                    if folder_date:
			
 
				+                        if oldest_record is None or folder_date < oldest_record:
			
 
				+                            oldest_record = folder_date
			
 
				+                        if latest_record is None or folder_date > latest_record:
			
 
				+                            latest_record = folder_date
			
 
				 
			
 
				                     # 计算存储大小
			
 
				                     for item in date_folder.rglob("*"):
			
--- a/mcp_server/services/parser_service.py
+++ b/mcp_server/services/parser_service.py
@@ -2,9 +2,12 @@
 
				 文件解析服务
			
 
				 
			
 
				 提供txt格式新闻数据和YAML配置文件的解析功能。
			
 
				+支持从 SQLite 数据库和 TXT 文件两种数据源读取。
			
 
				 """
			
 
				 
			
 
				+import json
			
 
				 import re
			
 
				+import sqlite3
			
 
				 from pathlib import Path
			
 
				 from typing import Dict, List, Tuple, Optional
			
 
				 from datetime import datetime
			
@@ -145,17 +148,310 @@ class ParserService:
 
				 
			
 
				     def get_date_folder_name(self, date: datetime = None) -> str:
			
 
				         """
			
 
				-        获取日期文件夹名称
			
 
				+        获取日期文件夹名称（兼容中文和ISO格式）
			
 
				 
			
 
				         Args:
			
 
				             date: 日期对象，默认为今天
			
 
				 
			
 
				         Returns:
			
 
				-            文件夹名称，格式: YYYY年MM月DD日
			
 
				+            实际存在的文件夹名称，优先返回中文格式（YYYY年MM月DD日），
			
 
				+            若不存在则返回 ISO 格式（YYYY-MM-DD）
			
 
				         """
			
 
				         if date is None:
			
 
				             date = datetime.now()
			
 
				-        return date.strftime("%Y年%m月%d日")
			
 
				+        return self._find_date_folder(date)
			
 
				+
			
 
				+    def _get_date_folder_name(self, date: datetime = None) -> str:
			
 
				+        """
			
 
				+        获取日期文件夹名称（兼容中文和ISO格式）
			
 
				+
			
 
				+        Args:
			
 
				+            date: 日期对象，默认为今天
			
 
				+
			
 
				+        Returns:
			
 
				+            实际存在的文件夹名称，优先返回中文格式（YYYY年MM月DD日），
			
 
				+            若不存在则返回 ISO 格式（YYYY-MM-DD）
			
 
				+        """
			
 
				+        if date is None:
			
 
				+            date = datetime.now()
			
 
				+        return self._find_date_folder(date)
			
 
				+
			
 
				+    def _find_date_folder(self, date: datetime) -> str:
			
 
				+        """
			
 
				+        查找实际存在的日期文件夹
			
 
				+
			
 
				+        支持两种格式：
			
 
				+        - 中文格式：YYYY年MM月DD日（优先）
			
 
				+        - ISO格式：YYYY-MM-DD
			
 
				+
			
 
				+        Args:
			
 
				+            date: 日期对象
			
 
				+
			
 
				+        Returns:
			
 
				+            实际存在的文件夹名称，若都不存在则返回中文格式
			
 
				+        """
			
 
				+        output_dir = self.project_root / "output"
			
 
				+
			
 
				+        # 中文格式：YYYY年MM月DD日
			
 
				+        chinese_format = date.strftime("%Y年%m月%d日")
			
 
				+        # ISO格式：YYYY-MM-DD
			
 
				+        iso_format = date.strftime("%Y-%m-%d")
			
 
				+
			
 
				+        # 优先检查中文格式
			
 
				+        if (output_dir / chinese_format).exists():
			
 
				+            return chinese_format
			
 
				+        # 其次检查 ISO 格式
			
 
				+        if (output_dir / iso_format).exists():
			
 
				+            return iso_format
			
 
				+
			
 
				+        # 都不存在，返回中文格式（与项目现有风格一致）
			
 
				+        return chinese_format
			
 
				+
			
 
				+    def _get_sqlite_db_path(self, date: datetime = None) -> Optional[Path]:
			
 
				+        """
			
 
				+        获取 SQLite 数据库文件路径
			
 
				+
			
 
				+        Args:
			
 
				+            date: 日期对象，默认为今天
			
 
				+
			
 
				+        Returns:
			
 
				+            数据库文件路径，如果不存在则返回 None
			
 
				+        """
			
 
				+        date_folder = self._get_date_folder_name(date)
			
 
				+        db_path = self.project_root / "output" / date_folder / "news.db"
			
 
				+        if db_path.exists():
			
 
				+            return db_path
			
 
				+        return None
			
 
				+
			
 
				+    def _get_txt_folder_path(self, date: datetime = None) -> Optional[Path]:
			
 
				+        """
			
 
				+        获取 TXT 文件夹路径
			
 
				+
			
 
				+        Args:
			
 
				+            date: 日期对象，默认为今天
			
 
				+
			
 
				+        Returns:
			
 
				+            TXT 文件夹路径，如果不存在则返回 None
			
 
				+        """
			
 
				+        date_folder = self._get_date_folder_name(date)
			
 
				+        txt_path = self.project_root / "output" / date_folder / "txt"
			
 
				+        if txt_path.exists() and txt_path.is_dir():
			
 
				+            return txt_path
			
 
				+        return None
			
 
				+
			
 
				+    def _read_from_txt(
			
 
				+        self,
			
 
				+        date: datetime = None,
			
 
				+        platform_ids: Optional[List[str]] = None
			
 
				+    ) -> Optional[Tuple[Dict, Dict, Dict]]:
			
 
				+        """
			
 
				+        从 TXT 文件夹读取新闻数据
			
 
				+
			
 
				+        Args:
			
 
				+            date: 日期对象，默认为今天
			
 
				+            platform_ids: 平台ID列表，None表示所有平台
			
 
				+
			
 
				+        Returns:
			
 
				+            (all_titles, id_to_name, all_timestamps) 元组，如果不存在返回 None
			
 
				+        """
			
 
				+        txt_folder = self._get_txt_folder_path(date)
			
 
				+        if txt_folder is None:
			
 
				+            return None
			
 
				+
			
 
				+        # 获取所有 TXT 文件并按时间排序
			
 
				+        txt_files = sorted(txt_folder.glob("*.txt"))
			
 
				+        if not txt_files:
			
 
				+            return None
			
 
				+
			
 
				+        all_titles = {}
			
 
				+        id_to_name = {}
			
 
				+        all_timestamps = {}
			
 
				+
			
 
				+        for txt_file in txt_files:
			
 
				+            try:
			
 
				+                titles_by_id, file_id_to_name = self.parse_txt_file(txt_file)
			
 
				+
			
 
				+                # 记录时间戳
			
 
				+                all_timestamps[txt_file.name] = txt_file.stat().st_mtime
			
 
				+
			
 
				+                # 合并 id_to_name
			
 
				+                id_to_name.update(file_id_to_name)
			
 
				+
			
 
				+                # 合并标题数据
			
 
				+                for source_id, titles in titles_by_id.items():
			
 
				+                    # 如果指定了 platform_ids，过滤
			
 
				+                    if platform_ids and source_id not in platform_ids:
			
 
				+                        continue
			
 
				+
			
 
				+                    if source_id not in all_titles:
			
 
				+                        all_titles[source_id] = {}
			
 
				+
			
 
				+                    for title, data in titles.items():
			
 
				+                        if title not in all_titles[source_id]:
			
 
				+                            # 新标题
			
 
				+                            all_titles[source_id][title] = {
			
 
				+                                "ranks": data.get("ranks", []),
			
 
				+                                "url": data.get("url", ""),
			
 
				+                                "mobileUrl": data.get("mobileUrl", ""),
			
 
				+                                "first_time": txt_file.stem,  # 使用文件名作为时间
			
 
				+                                "last_time": txt_file.stem,
			
 
				+                                "count": 1,
			
 
				+                            }
			
 
				+                        else:
			
 
				+                            # 合并已存在的标题
			
 
				+                            existing = all_titles[source_id][title]
			
 
				+                            # 合并排名
			
 
				+                            for rank in data.get("ranks", []):
			
 
				+                                if rank not in existing["ranks"]:
			
 
				+                                    existing["ranks"].append(rank)
			
 
				+                            # 更新 last_time
			
 
				+                            existing["last_time"] = txt_file.stem
			
 
				+                            existing["count"] += 1
			
 
				+                            # 保留 URL
			
 
				+                            if not existing["url"] and data.get("url"):
			
 
				+                                existing["url"] = data["url"]
			
 
				+                            if not existing["mobileUrl"] and data.get("mobileUrl"):
			
 
				+                                existing["mobileUrl"] = data["mobileUrl"]
			
 
				+
			
 
				+            except Exception as e:
			
 
				+                print(f"Warning: 解析 TXT 文件失败 {txt_file}: {e}")
			
 
				+                continue
			
 
				+
			
 
				+        if not all_titles:
			
 
				+            return None
			
 
				+
			
 
				+        return (all_titles, id_to_name, all_timestamps)
			
 
				+
			
 
				+    def _read_from_sqlite(
			
 
				+        self,
			
 
				+        date: datetime = None,
			
 
				+        platform_ids: Optional[List[str]] = None
			
 
				+    ) -> Optional[Tuple[Dict, Dict, Dict]]:
			
 
				+        """
			
 
				+        从 SQLite 数据库读取新闻数据
			
 
				+
			
 
				+        新表结构数据已按 URL 去重，包含：
			
 
				+        - first_crawl_time: 首次抓取时间
			
 
				+        - last_crawl_time: 最后抓取时间
			
 
				+        - crawl_count: 抓取次数
			
 
				+
			
 
				+        Args:
			
 
				+            date: 日期对象，默认为今天
			
 
				+            platform_ids: 平台ID列表，None表示所有平台
			
 
				+
			
 
				+        Returns:
			
 
				+            (all_titles, id_to_name, all_timestamps) 元组，如果数据库不存在返回 None
			
 
				+        """
			
 
				+        db_path = self._get_sqlite_db_path(date)
			
 
				+        if db_path is None:
			
 
				+            return None
			
 
				+
			
 
				+        all_titles = {}
			
 
				+        id_to_name = {}
			
 
				+        all_timestamps = {}
			
 
				+
			
 
				+        try:
			
 
				+            conn = sqlite3.connect(str(db_path))
			
 
				+            conn.row_factory = sqlite3.Row
			
 
				+            cursor = conn.cursor()
			
 
				+
			
 
				+            # 检查表是否存在
			
 
				+            cursor.execute("""
			
 
				+                SELECT name FROM sqlite_master
			
 
				+                WHERE type='table' AND name='news_items'
			
 
				+            """)
			
 
				+            if not cursor.fetchone():
			
 
				+                conn.close()
			
 
				+                return None
			
 
				+
			
 
				+            # 构建查询
			
 
				+            if platform_ids:
			
 
				+                placeholders = ','.join(['?' for _ in platform_ids])
			
 
				+                query = f"""
			
 
				+                    SELECT n.id, n.platform_id, p.name as platform_name, n.title,
			
 
				+                           n.rank, n.url, n.mobile_url,
			
 
				+                           n.first_crawl_time, n.last_crawl_time, n.crawl_count
			
 
				+                    FROM news_items n
			
 
				+                    LEFT JOIN platforms p ON n.platform_id = p.id
			
 
				+                    WHERE n.platform_id IN ({placeholders})
			
 
				+                """
			
 
				+                cursor.execute(query, platform_ids)
			
 
				+            else:
			
 
				+                cursor.execute("""
			
 
				+                    SELECT n.id, n.platform_id, p.name as platform_name, n.title,
			
 
				+                           n.rank, n.url, n.mobile_url,
			
 
				+                           n.first_crawl_time, n.last_crawl_time, n.crawl_count
			
 
				+                    FROM news_items n
			
 
				+                    LEFT JOIN platforms p ON n.platform_id = p.id
			
 
				+                """)
			
 
				+
			
 
				+            rows = cursor.fetchall()
			
 
				+
			
 
				+            # 收集所有 news_item_id 用于查询历史排名
			
 
				+            news_ids = [row['id'] for row in rows]
			
 
				+            rank_history_map = {}
			
 
				+
			
 
				+            if news_ids:
			
 
				+                placeholders = ",".join("?" * len(news_ids))
			
 
				+                cursor.execute(f"""
			
 
				+                    SELECT news_item_id, rank FROM rank_history
			
 
				+                    WHERE news_item_id IN ({placeholders})
			
 
				+                    ORDER BY news_item_id, crawl_time
			
 
				+                """, news_ids)
			
 
				+                
			
 
				+                for rh_row in cursor.fetchall():
			
 
				+                    news_id = rh_row['news_item_id']
			
 
				+                    rank = rh_row['rank']
			
 
				+                    if news_id not in rank_history_map:
			
 
				+                        rank_history_map[news_id] = []
			
 
				+                    rank_history_map[news_id].append(rank)
			
 
				+
			
 
				+            for row in rows:
			
 
				+                news_id = row['id']
			
 
				+                platform_id = row['platform_id']
			
 
				+                platform_name = row['platform_name'] or platform_id
			
 
				+                title = row['title']
			
 
				+
			
 
				+                # 更新 id_to_name
			
 
				+                if platform_id not in id_to_name:
			
 
				+                    id_to_name[platform_id] = platform_name
			
 
				+
			
 
				+                # 初始化平台字典
			
 
				+                if platform_id not in all_titles:
			
 
				+                    all_titles[platform_id] = {}
			
 
				+
			
 
				+                # 获取排名历史，如果为空则使用当前排名
			
 
				+                ranks = rank_history_map.get(news_id, [row['rank']])
			
 
				+
			
 
				+                # 直接使用数据（已去重）
			
 
				+                all_titles[platform_id][title] = {
			
 
				+                    "ranks": ranks,
			
 
				+                    "url": row['url'] or "",
			
 
				+                    "mobileUrl": row['mobile_url'] or "",
			
 
				+                    "first_time": row['first_crawl_time'] or "",
			
 
				+                    "last_time": row['last_crawl_time'] or "",
			
 
				+                    "count": row['crawl_count'] or 1,
			
 
				+                }
			
 
				+
			
 
				+            # 获取抓取时间作为 timestamps
			
 
				+            cursor.execute("""
			
 
				+                SELECT crawl_time FROM crawl_records
			
 
				+                ORDER BY crawl_time
			
 
				+            """)
			
 
				+            for row in cursor.fetchall():
			
 
				+                crawl_time = row['crawl_time']
			
 
				+                all_timestamps[f"{crawl_time}.db"] = 0  # 用虚拟时间戳
			
 
				+
			
 
				+            conn.close()
			
 
				+
			
 
				+            if not all_titles:
			
 
				+                return None
			
 
				+
			
 
				+            return (all_titles, id_to_name, all_timestamps)
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            print(f"Warning: 从 SQLite 读取数据失败: {e}")
			
 
				+            return None
			
 
				 
			
 
				     def read_all_titles_for_date(
			
 
				         self,
			
@@ -163,7 +459,7 @@ class ParserService:
 
				         platform_ids: Optional[List[str]] = None
			
 
				     ) -> Tuple[Dict, Dict, Dict]:
			
 
				         """
			
 
				-        读取指定日期的所有标题文件（带缓存）
			
 
				+        读取指定日期的所有标题（带缓存）
			
 
				 
			
 
				         Args:
			
 
				             date: 日期对象，默认为今天
			
@@ -193,71 +489,23 @@ class ParserService:
 
				         if cached:
			
 
				             return cached
			
 
				 
			
 
				-        # 缓存未命中，读取文件
			
 
				-        date_folder = self.get_date_folder_name(date)
			
 
				-        txt_dir = self.project_root / "output" / date_folder / "txt"
			
 
				-
			
 
				-        if not txt_dir.exists():
			
 
				-            raise DataNotFoundError(
			
 
				-                f"未找到 {date_folder} 的数据目录",
			
 
				-                suggestion="请先运行爬虫或检查日期是否正确"
			
 
				-            )
			
 
				-
			
 
				-        all_titles = {}
			
 
				-        id_to_name = {}
			
 
				-        all_timestamps = {}
			
 
				-
			
 
				-        # 读取所有txt文件
			
 
				-        txt_files = sorted(txt_dir.glob("*.txt"))
			
 
				-
			
 
				-        if not txt_files:
			
 
				-            raise DataNotFoundError(
			
 
				-                f"{date_folder} 没有数据文件",
			
 
				-                suggestion="请等待爬虫任务完成"
			
 
				-            )
			
 
				-
			
 
				-        for txt_file in txt_files:
			
 
				-            try:
			
 
				-                titles_by_id, file_id_to_name = self.parse_txt_file(txt_file)
			
 
				-
			
 
				-                # 更新id_to_name
			
 
				-                id_to_name.update(file_id_to_name)
			
 
				-
			
 
				-                # 合并标题数据
			
 
				-                for platform_id, titles in titles_by_id.items():
			
 
				-                    # 如果指定了平台过滤
			
 
				-                    if platform_ids and platform_id not in platform_ids:
			
 
				-                        continue
			
 
				-
			
 
				-                    if platform_id not in all_titles:
			
 
				-                        all_titles[platform_id] = {}
			
 
				-
			
 
				-                    for title, info in titles.items():
			
 
				-                        if title in all_titles[platform_id]:
			
 
				-                            # 合并排名
			
 
				-                            all_titles[platform_id][title]["ranks"].extend(info["ranks"])
			
 
				-                        else:
			
 
				-                            all_titles[platform_id][title] = info.copy()
			
 
				-
			
 
				-                # 记录文件时间戳
			
 
				-                all_timestamps[txt_file.name] = txt_file.stat().st_mtime
			
 
				-
			
 
				-            except Exception as e:
			
 
				-                # 忽略单个文件的解析错误，继续处理其他文件
			
 
				-                print(f"Warning: 解析文件 {txt_file} 失败: {e}")
			
 
				-                continue
			
 
				-
			
 
				-        if not all_titles:
			
 
				-            raise DataNotFoundError(
			
 
				-                f"{date_folder} 没有有效的数据",
			
 
				-                suggestion="请检查数据文件格式或重新运行爬虫"
			
 
				-            )
			
 
				-
			
 
				-        # 缓存结果
			
 
				-        result = (all_titles, id_to_name, all_timestamps)
			
 
				-        self.cache.set(cache_key, result)
			
 
				-
			
 
				-        return result
			
 
				+        # 优先从 SQLite 读取
			
 
				+        sqlite_result = self._read_from_sqlite(date, platform_ids)
			
 
				+        if sqlite_result:
			
 
				+            self.cache.set(cache_key, sqlite_result)
			
 
				+            return sqlite_result
			
 
				+
			
 
				+        # SQLite 不存在，尝试从 TXT 读取
			
 
				+        txt_result = self._read_from_txt(date, platform_ids)
			
 
				+        if txt_result:
			
 
				+            self.cache.set(cache_key, txt_result)
			
 
				+            return txt_result
			
 
				+
			
 
				+        # 两种数据源都不存在
			
 
				+        raise DataNotFoundError(
			
 
				+            f"未找到 {date_str} 的数据",
			
 
				+            suggestion="请先运行爬虫或检查日期是否正确"
			
 
				+        )
			
 
				 
			
 
				     def parse_yaml_config(self, config_path: str = None) -> dict:
			
 
				         """
			
--- a/mcp_server/tools/analytics.py
+++ b/mcp_server/tools/analytics.py
@@ -25,7 +25,6 @@ def calculate_news_weight(news_data: Dict, rank_threshold: int = 5) -> float:
 
				     """
			
 
				     计算新闻权重（用于排序）
			
 
				 
			
 
				-    基于 main.py 的权重算法实现，综合考虑：
			
 
				     - 排名权重 (60%)：新闻在榜单中的排名
			
 
				     - 频次权重 (30%)：新闻出现的次数
			
 
				     - 热度权重 (10%)：高排名出现的比例
			
--- a/mcp_server/tools/storage_sync.py
+++ b/mcp_server/tools/storage_sync.py
@@ -0,0 +1,468 @@
 
				+# coding=utf-8
			
 
				+"""
			
 
				+存储同步工具
			
 
				+
			
 
				+实现从远程存储拉取数据到本地、获取存储状态、列出可用日期等功能。
			
 
				+"""
			
 
				+
			
 
				+import os
			
 
				+import re
			
 
				+from pathlib import Path
			
 
				+from datetime import datetime, timedelta
			
 
				+from typing import Dict, List, Optional
			
 
				+
			
 
				+import yaml
			
 
				+
			
 
				+from ..utils.errors import MCPError
			
 
				+
			
 
				+
			
 
				+class StorageSyncTools:
			
 
				+    """存储同步工具类"""
			
 
				+
			
 
				+    def __init__(self, project_root: str = None):
			
 
				+        """
			
 
				+        初始化存储同步工具
			
 
				+
			
 
				+        Args:
			
 
				+            project_root: 项目根目录
			
 
				+        """
			
 
				+        if project_root:
			
 
				+            self.project_root = Path(project_root)
			
 
				+        else:
			
 
				+            current_file = Path(__file__)
			
 
				+            self.project_root = current_file.parent.parent.parent
			
 
				+
			
 
				+        self._config = None
			
 
				+        self._remote_backend = None
			
 
				+
			
 
				+    def _load_config(self) -> dict:
			
 
				+        """加载配置文件"""
			
 
				+        if self._config is None:
			
 
				+            config_path = self.project_root / "config" / "config.yaml"
			
 
				+            if config_path.exists():
			
 
				+                with open(config_path, "r", encoding="utf-8") as f:
			
 
				+                    self._config = yaml.safe_load(f)
			
 
				+            else:
			
 
				+                self._config = {}
			
 
				+        return self._config
			
 
				+
			
 
				+    def _get_storage_config(self) -> dict:
			
 
				+        """获取存储配置"""
			
 
				+        config = self._load_config()
			
 
				+        return config.get("storage", {})
			
 
				+
			
 
				+    def _get_remote_config(self) -> dict:
			
 
				+        """
			
 
				+        获取远程存储配置（合并配置文件和环境变量）
			
 
				+        """
			
 
				+        storage_config = self._get_storage_config()
			
 
				+        remote_config = storage_config.get("remote", {})
			
 
				+
			
 
				+        return {
			
 
				+            "endpoint_url": remote_config.get("endpoint_url") or os.environ.get("S3_ENDPOINT_URL", ""),
			
 
				+            "bucket_name": remote_config.get("bucket_name") or os.environ.get("S3_BUCKET_NAME", ""),
			
 
				+            "access_key_id": remote_config.get("access_key_id") or os.environ.get("S3_ACCESS_KEY_ID", ""),
			
 
				+            "secret_access_key": remote_config.get("secret_access_key") or os.environ.get("S3_SECRET_ACCESS_KEY", ""),
			
 
				+            "region": remote_config.get("region") or os.environ.get("S3_REGION", ""),
			
 
				+        }
			
 
				+
			
 
				+    def _has_remote_config(self) -> bool:
			
 
				+        """检查是否有有效的远程存储配置"""
			
 
				+        config = self._get_remote_config()
			
 
				+        return bool(
			
 
				+            config.get("bucket_name") and
			
 
				+            config.get("access_key_id") and
			
 
				+            config.get("secret_access_key") and
			
 
				+            config.get("endpoint_url")
			
 
				+        )
			
 
				+
			
 
				+    def _get_remote_backend(self):
			
 
				+        """获取远程存储后端实例"""
			
 
				+        if self._remote_backend is not None:
			
 
				+            return self._remote_backend
			
 
				+
			
 
				+        if not self._has_remote_config():
			
 
				+            return None
			
 
				+
			
 
				+        try:
			
 
				+            from trendradar.storage.remote import RemoteStorageBackend
			
 
				+
			
 
				+            remote_config = self._get_remote_config()
			
 
				+            config = self._load_config()
			
 
				+            timezone = config.get("app", {}).get("timezone", "Asia/Shanghai")
			
 
				+
			
 
				+            self._remote_backend = RemoteStorageBackend(
			
 
				+                bucket_name=remote_config["bucket_name"],
			
 
				+                access_key_id=remote_config["access_key_id"],
			
 
				+                secret_access_key=remote_config["secret_access_key"],
			
 
				+                endpoint_url=remote_config["endpoint_url"],
			
 
				+                region=remote_config.get("region", ""),
			
 
				+                timezone=timezone,
			
 
				+            )
			
 
				+            return self._remote_backend
			
 
				+        except ImportError:
			
 
				+            print("[存储同步] 远程存储后端需要安装 boto3: pip install boto3")
			
 
				+            return None
			
 
				+        except Exception as e:
			
 
				+            print(f"[存储同步] 创建远程后端失败: {e}")
			
 
				+            return None
			
 
				+
			
 
				+    def _get_local_data_dir(self) -> Path:
			
 
				+        """获取本地数据目录"""
			
 
				+        storage_config = self._get_storage_config()
			
 
				+        local_config = storage_config.get("local", {})
			
 
				+        data_dir = local_config.get("data_dir", "output")
			
 
				+        return self.project_root / data_dir
			
 
				+
			
 
				+    def _parse_date_folder_name(self, folder_name: str) -> Optional[datetime]:
			
 
				+        """
			
 
				+        解析日期文件夹名称（兼容中文和 ISO 格式）
			
 
				+
			
 
				+        支持两种格式：
			
 
				+        - 中文格式：YYYY年MM月DD日
			
 
				+        - ISO 格式：YYYY-MM-DD
			
 
				+        """
			
 
				+        # 尝试 ISO 格式
			
 
				+        iso_match = re.match(r'(\d{4})-(\d{2})-(\d{2})', folder_name)
			
 
				+        if iso_match:
			
 
				+            try:
			
 
				+                return datetime(
			
 
				+                    int(iso_match.group(1)),
			
 
				+                    int(iso_match.group(2)),
			
 
				+                    int(iso_match.group(3))
			
 
				+                )
			
 
				+            except ValueError:
			
 
				+                pass
			
 
				+
			
 
				+        # 尝试中文格式
			
 
				+        chinese_match = re.match(r'(\d{4})年(\d{2})月(\d{2})日', folder_name)
			
 
				+        if chinese_match:
			
 
				+            try:
			
 
				+                return datetime(
			
 
				+                    int(chinese_match.group(1)),
			
 
				+                    int(chinese_match.group(2)),
			
 
				+                    int(chinese_match.group(3))
			
 
				+                )
			
 
				+            except ValueError:
			
 
				+                pass
			
 
				+
			
 
				+        return None
			
 
				+
			
 
				+    def _get_local_dates(self) -> List[str]:
			
 
				+        """获取本地可用的日期列表"""
			
 
				+        local_dir = self._get_local_data_dir()
			
 
				+        dates = []
			
 
				+
			
 
				+        if not local_dir.exists():
			
 
				+            return dates
			
 
				+
			
 
				+        for item in local_dir.iterdir():
			
 
				+            if item.is_dir() and not item.name.startswith('.'):
			
 
				+                folder_date = self._parse_date_folder_name(item.name)
			
 
				+                if folder_date:
			
 
				+                    dates.append(folder_date.strftime("%Y-%m-%d"))
			
 
				+
			
 
				+        return sorted(dates, reverse=True)
			
 
				+
			
 
				+    def _calculate_dir_size(self, path: Path) -> int:
			
 
				+        """计算目录大小（字节）"""
			
 
				+        total_size = 0
			
 
				+        if path.exists():
			
 
				+            for item in path.rglob("*"):
			
 
				+                if item.is_file():
			
 
				+                    total_size += item.stat().st_size
			
 
				+        return total_size
			
 
				+
			
 
				+    def sync_from_remote(self, days: int = 7) -> Dict:
			
 
				+        """
			
 
				+        从远程存储拉取数据到本地
			
 
				+
			
 
				+        Args:
			
 
				+            days: 拉取最近 N 天的数据，默认 7 天
			
 
				+
			
 
				+        Returns:
			
 
				+            同步结果字典
			
 
				+        """
			
 
				+        try:
			
 
				+            # 检查远程配置
			
 
				+            if not self._has_remote_config():
			
 
				+                return {
			
 
				+                    "success": False,
			
 
				+                    "error": {
			
 
				+                        "code": "REMOTE_NOT_CONFIGURED",
			
 
				+                        "message": "未配置远程存储",
			
 
				+                        "suggestion": "请在 config/config.yaml 中配置 storage.remote 或设置环境变量"
			
 
				+                    }
			
 
				+                }
			
 
				+
			
 
				+            # 获取远程后端
			
 
				+            remote_backend = self._get_remote_backend()
			
 
				+            if remote_backend is None:
			
 
				+                return {
			
 
				+                    "success": False,
			
 
				+                    "error": {
			
 
				+                        "code": "REMOTE_BACKEND_FAILED",
			
 
				+                        "message": "无法创建远程存储后端",
			
 
				+                        "suggestion": "请检查远程存储配置和 boto3 是否已安装"
			
 
				+                    }
			
 
				+                }
			
 
				+
			
 
				+            # 获取本地数据目录
			
 
				+            local_dir = self._get_local_data_dir()
			
 
				+            local_dir.mkdir(parents=True, exist_ok=True)
			
 
				+
			
 
				+            # 获取远程可用日期
			
 
				+            remote_dates = remote_backend.list_remote_dates()
			
 
				+
			
 
				+            # 获取本地已有日期
			
 
				+            local_dates = set(self._get_local_dates())
			
 
				+
			
 
				+            # 计算需要拉取的日期（最近 N 天）
			
 
				+            from trendradar.utils.time import get_configured_time
			
 
				+            config = self._load_config()
			
 
				+            timezone = config.get("app", {}).get("timezone", "Asia/Shanghai")
			
 
				+            now = get_configured_time(timezone)
			
 
				+
			
 
				+            target_dates = []
			
 
				+            for i in range(days):
			
 
				+                date = now - timedelta(days=i)
			
 
				+                date_str = date.strftime("%Y-%m-%d")
			
 
				+                if date_str in remote_dates:
			
 
				+                    target_dates.append(date_str)
			
 
				+
			
 
				+            # 执行拉取
			
 
				+            synced_dates = []
			
 
				+            skipped_dates = []
			
 
				+            failed_dates = []
			
 
				+
			
 
				+            for date_str in target_dates:
			
 
				+                # 检查本地是否已存在
			
 
				+                if date_str in local_dates:
			
 
				+                    skipped_dates.append(date_str)
			
 
				+                    continue
			
 
				+
			
 
				+                # 拉取单个日期
			
 
				+                try:
			
 
				+                    local_date_dir = local_dir / date_str
			
 
				+                    local_db_path = local_date_dir / "news.db"
			
 
				+                    remote_key = f"news/{date_str}.db"
			
 
				+
			
 
				+                    local_date_dir.mkdir(parents=True, exist_ok=True)
			
 
				+                    remote_backend.s3_client.download_file(
			
 
				+                        remote_backend.bucket_name,
			
 
				+                        remote_key,
			
 
				+                        str(local_db_path)
			
 
				+                    )
			
 
				+                    synced_dates.append(date_str)
			
 
				+                    print(f"[存储同步] 已拉取: {date_str}")
			
 
				+                except Exception as e:
			
 
				+                    failed_dates.append({"date": date_str, "error": str(e)})
			
 
				+                    print(f"[存储同步] 拉取失败 ({date_str}): {e}")
			
 
				+
			
 
				+            return {
			
 
				+                "success": True,
			
 
				+                "synced_files": len(synced_dates),
			
 
				+                "synced_dates": synced_dates,
			
 
				+                "skipped_dates": skipped_dates,
			
 
				+                "failed_dates": failed_dates,
			
 
				+                "message": f"成功同步 {len(synced_dates)} 天数据" + (
			
 
				+                    f"，跳过 {len(skipped_dates)} 天（本地已存在）" if skipped_dates else ""
			
 
				+                ) + (
			
 
				+                    f"，失败 {len(failed_dates)} 天" if failed_dates else ""
			
 
				+                )
			
 
				+            }
			
 
				+
			
 
				+        except MCPError as e:
			
 
				+            return {
			
 
				+                "success": False,
			
 
				+                "error": e.to_dict()
			
 
				+            }
			
 
				+        except Exception as e:
			
 
				+            return {
			
 
				+                "success": False,
			
 
				+                "error": {
			
 
				+                    "code": "INTERNAL_ERROR",
			
 
				+                    "message": str(e)
			
 
				+                }
			
 
				+            }
			
 
				+
			
 
				+    def get_storage_status(self) -> Dict:
			
 
				+        """
			
 
				+        获取存储配置和状态
			
 
				+
			
 
				+        Returns:
			
 
				+            存储状态字典
			
 
				+        """
			
 
				+        try:
			
 
				+            storage_config = self._get_storage_config()
			
 
				+            config = self._load_config()
			
 
				+
			
 
				+            # 本地存储状态
			
 
				+            local_config = storage_config.get("local", {})
			
 
				+            local_dir = self._get_local_data_dir()
			
 
				+            local_size = self._calculate_dir_size(local_dir)
			
 
				+            local_dates = self._get_local_dates()
			
 
				+
			
 
				+            local_status = {
			
 
				+                "data_dir": local_config.get("data_dir", "output"),
			
 
				+                "retention_days": local_config.get("retention_days", 0),
			
 
				+                "total_size": f"{local_size / 1024 / 1024:.2f} MB",
			
 
				+                "total_size_bytes": local_size,
			
 
				+                "date_count": len(local_dates),
			
 
				+                "earliest_date": local_dates[-1] if local_dates else None,
			
 
				+                "latest_date": local_dates[0] if local_dates else None,
			
 
				+            }
			
 
				+
			
 
				+            # 远程存储状态
			
 
				+            remote_config = storage_config.get("remote", {})
			
 
				+            has_remote = self._has_remote_config()
			
 
				+
			
 
				+            remote_status = {
			
 
				+                "configured": has_remote,
			
 
				+                "retention_days": remote_config.get("retention_days", 0),
			
 
				+            }
			
 
				+
			
 
				+            if has_remote:
			
 
				+                merged_config = self._get_remote_config()
			
 
				+                # 脱敏显示
			
 
				+                endpoint = merged_config.get("endpoint_url", "")
			
 
				+                bucket = merged_config.get("bucket_name", "")
			
 
				+                remote_status["endpoint_url"] = endpoint
			
 
				+                remote_status["bucket_name"] = bucket
			
 
				+
			
 
				+                # 尝试获取远程日期列表
			
 
				+                remote_backend = self._get_remote_backend()
			
 
				+                if remote_backend:
			
 
				+                    try:
			
 
				+                        remote_dates = remote_backend.list_remote_dates()
			
 
				+                        remote_status["date_count"] = len(remote_dates)
			
 
				+                        remote_status["earliest_date"] = remote_dates[-1] if remote_dates else None
			
 
				+                        remote_status["latest_date"] = remote_dates[0] if remote_dates else None
			
 
				+                    except Exception as e:
			
 
				+                        remote_status["error"] = str(e)
			
 
				+
			
 
				+            # 拉取配置状态
			
 
				+            pull_config = storage_config.get("pull", {})
			
 
				+            pull_status = {
			
 
				+                "enabled": pull_config.get("enabled", False),
			
 
				+                "days": pull_config.get("days", 7),
			
 
				+            }
			
 
				+
			
 
				+            return {
			
 
				+                "success": True,
			
 
				+                "backend": storage_config.get("backend", "auto"),
			
 
				+                "local": local_status,
			
 
				+                "remote": remote_status,
			
 
				+                "pull": pull_status,
			
 
				+            }
			
 
				+
			
 
				+        except MCPError as e:
			
 
				+            return {
			
 
				+                "success": False,
			
 
				+                "error": e.to_dict()
			
 
				+            }
			
 
				+        except Exception as e:
			
 
				+            return {
			
 
				+                "success": False,
			
 
				+                "error": {
			
 
				+                    "code": "INTERNAL_ERROR",
			
 
				+                    "message": str(e)
			
 
				+                }
			
 
				+            }
			
 
				+
			
 
				+    def list_available_dates(self, source: str = "both") -> Dict:
			
 
				+        """
			
 
				+        列出可用的日期范围
			
 
				+
			
 
				+        Args:
			
 
				+            source: 数据来源
			
 
				+                - "local": 仅本地
			
 
				+                - "remote": 仅远程
			
 
				+                - "both": 两者都列出（默认）
			
 
				+
			
 
				+        Returns:
			
 
				+            日期列表字典
			
 
				+        """
			
 
				+        try:
			
 
				+            result = {
			
 
				+                "success": True,
			
 
				+            }
			
 
				+
			
 
				+            # 本地日期
			
 
				+            if source in ("local", "both"):
			
 
				+                local_dates = self._get_local_dates()
			
 
				+                result["local"] = {
			
 
				+                    "dates": local_dates,
			
 
				+                    "count": len(local_dates),
			
 
				+                    "earliest": local_dates[-1] if local_dates else None,
			
 
				+                    "latest": local_dates[0] if local_dates else None,
			
 
				+                }
			
 
				+
			
 
				+            # 远程日期
			
 
				+            if source in ("remote", "both"):
			
 
				+                if not self._has_remote_config():
			
 
				+                    result["remote"] = {
			
 
				+                        "configured": False,
			
 
				+                        "dates": [],
			
 
				+                        "count": 0,
			
 
				+                        "earliest": None,
			
 
				+                        "latest": None,
			
 
				+                        "error": "未配置远程存储"
			
 
				+                    }
			
 
				+                else:
			
 
				+                    remote_backend = self._get_remote_backend()
			
 
				+                    if remote_backend:
			
 
				+                        try:
			
 
				+                            remote_dates = remote_backend.list_remote_dates()
			
 
				+                            result["remote"] = {
			
 
				+                                "configured": True,
			
 
				+                                "dates": remote_dates,
			
 
				+                                "count": len(remote_dates),
			
 
				+                                "earliest": remote_dates[-1] if remote_dates else None,
			
 
				+                                "latest": remote_dates[0] if remote_dates else None,
			
 
				+                            }
			
 
				+                        except Exception as e:
			
 
				+                            result["remote"] = {
			
 
				+                                "configured": True,
			
 
				+                                "dates": [],
			
 
				+                                "count": 0,
			
 
				+                                "earliest": None,
			
 
				+                                "latest": None,
			
 
				+                                "error": str(e)
			
 
				+                            }
			
 
				+                    else:
			
 
				+                        result["remote"] = {
			
 
				+                            "configured": True,
			
 
				+                            "dates": [],
			
 
				+                            "count": 0,
			
 
				+                            "earliest": None,
			
 
				+                            "latest": None,
			
 
				+                            "error": "无法创建远程存储后端"
			
 
				+                        }
			
 
				+
			
 
				+            # 如果同时查询两者，计算差异
			
 
				+            if source == "both" and "local" in result and "remote" in result:
			
 
				+                local_set = set(result["local"]["dates"])
			
 
				+                remote_set = set(result["remote"].get("dates", []))
			
 
				+
			
 
				+                result["comparison"] = {
			
 
				+                    "only_local": sorted(list(local_set - remote_set), reverse=True),
			
 
				+                    "only_remote": sorted(list(remote_set - local_set), reverse=True),
			
 
				+                    "both": sorted(list(local_set & remote_set), reverse=True),
			
 
				+                }
			
 
				+
			
 
				+            return result
			
 
				+
			
 
				+        except MCPError as e:
			
 
				+            return {
			
 
				+                "success": False,
			
 
				+                "error": e.to_dict()
			
 
				+            }
			
 
				+        except Exception as e:
			
 
				+            return {
			
 
				+                "success": False,
			
 
				+                "error": {
			
 
				+                    "code": "INTERNAL_ERROR",
			
 
				+                    "message": str(e)
			
 
				+                }
			
 
				+            }
			
--- a/mcp_server/tools/system.py
+++ b/mcp_server/tools/system.py
@@ -87,13 +87,13 @@ class SystemManagementTools:
 
				             >>> print(result['saved_files'])
			
 
				         """
			
 
				         try:
			
 
				-            import json
			
 
				             import time
			
 
				-            import random
			
 
				-            import requests
			
 
				-            from datetime import datetime
			
 
				-            import pytz
			
 
				             import yaml
			
 
				+            from trendradar.crawler.fetcher import DataFetcher
			
 
				+            from trendradar.storage.local import LocalStorageBackend
			
 
				+            from trendradar.storage.base import convert_crawl_results_to_news_data
			
 
				+            from trendradar.utils.time import get_configured_time, format_date_folder, format_time_filename
			
 
				+            from ..services.cache_service import get_cache
			
 
				 
			
 
				             # 参数验证
			
 
				             platforms = validate_platforms(platforms)
			
@@ -129,9 +129,6 @@ class SystemManagementTools:
 
				             else:
			
 
				                 target_platforms = all_platforms
			
 
				 
			
 
				-            # 获取请求间隔
			
 
				-            request_interval = config_data.get("crawler", {}).get("request_interval", 100)
			
 
				-
			
 
				             # 构建平台ID列表
			
 
				             ids = []
			
 
				             for platform in target_platforms:
			
@@ -142,87 +139,82 @@ class SystemManagementTools:
 
				 
			
 
				             print(f"开始临时爬取，平台: {[p.get('name', p['id']) for p in target_platforms]}")
			
 
				 
			
 
				-            # 爬取数据
			
 
				-            results = {}
			
 
				-            id_to_name = {}
			
 
				-            failed_ids = []
			
 
				-
			
 
				-            for i, id_info in enumerate(ids):
			
 
				-                if isinstance(id_info, tuple):
			
 
				-                    id_value, name = id_info
			
 
				-                else:
			
 
				-                    id_value = id_info
			
 
				-                    name = id_value
			
 
				-
			
 
				-                id_to_name[id_value] = name
			
 
				-
			
 
				-                # 构建请求URL
			
 
				-                url = f"https://newsnow.busiyi.world/api/s?id={id_value}&latest"
			
 
				+            # 初始化数据获取器
			
 
				+            crawler_config = config_data.get("crawler", {})
			
 
				+            proxy_url = None
			
 
				+            if crawler_config.get("use_proxy"):
			
 
				+                proxy_url = crawler_config.get("proxy_url")
			
 
				+            
			
 
				+            fetcher = DataFetcher(proxy_url=proxy_url)
			
 
				+            request_interval = crawler_config.get("request_interval", 100)
			
 
				+
			
 
				+            # 执行爬取
			
 
				+            results, id_to_name, failed_ids = fetcher.crawl_websites(
			
 
				+                ids_list=ids,
			
 
				+                request_interval=request_interval
			
 
				+            )
			
 
				+
			
 
				+            # 获取当前时间（统一使用 trendradar 的时间工具）
			
 
				+            # 从配置中读取时区，默认为 Asia/Shanghai
			
 
				+            timezone = config_data.get("app", {}).get("timezone", "Asia/Shanghai")
			
 
				+            current_time = get_configured_time(timezone)
			
 
				+            crawl_date = format_date_folder(None, timezone)
			
 
				+            crawl_time_str = format_time_filename(timezone)
			
 
				+
			
 
				+            # 转换为标准数据模型
			
 
				+            news_data = convert_crawl_results_to_news_data(
			
 
				+                results=results,
			
 
				+                id_to_name=id_to_name,
			
 
				+                failed_ids=failed_ids,
			
 
				+                crawl_time=crawl_time_str,
			
 
				+                crawl_date=crawl_date
			
 
				+            )
			
 
				+
			
 
				+            # 初始化存储后端
			
 
				+            storage = LocalStorageBackend(
			
 
				+                data_dir=str(self.project_root / "output"),
			
 
				+                enable_txt=True,
			
 
				+                enable_html=True,
			
 
				+                timezone=timezone
			
 
				+            )
			
 
				+
			
 
				+            # 尝试持久化数据
			
 
				+            save_success = False
			
 
				+            save_error_msg = ""
			
 
				+            saved_files = {}
			
 
				+
			
 
				+            try:
			
 
				+                # 1. 保存到 SQLite (核心持久化)
			
 
				+                if storage.save_news_data(news_data):
			
 
				+                    save_success = True
			
 
				+                
			
 
				+                # 2. 如果请求保存到本地，生成 TXT/HTML 快照
			
 
				+                if save_to_local:
			
 
				+                    # 保存 TXT
			
 
				+                    txt_path = storage.save_txt_snapshot(news_data)
			
 
				+                    if txt_path:
			
 
				+                        saved_files["txt"] = txt_path
			
 
				+
			
 
				+                    # 保存 HTML (使用简化版生成器)
			
 
				+                    html_content = self._generate_simple_html(results, id_to_name, failed_ids, current_time)
			
 
				+                    html_filename = f"{crawl_time_str}.html"
			
 
				+                    html_path = storage.save_html_report(html_content, html_filename)
			
 
				+                    if html_path:
			
 
				+                        saved_files["html"] = html_path
			
 
				+
			
 
				+            except Exception as e:
			
 
				+                # 捕获所有保存错误（特别是 Docker 只读卷导致的 PermissionError）
			
 
				+                print(f"[System] 数据保存失败: {e}")
			
 
				+                save_success = False
			
 
				+                save_error_msg = str(e)
			
 
				+
			
 
				+            # 3. 清除缓存，确保下次查询获取最新数据
			
 
				+            # 即使保存失败，内存中的数据可能已经通过其他方式更新，或者是临时的
			
 
				+            get_cache().clear()
			
 
				+            print("[System] 缓存已清除")
			
 
				 
			
 
				-                headers = {
			
 
				-                    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
			
 
				-                    "Accept": "application/json, text/plain, */*",
			
 
				-                    "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
			
 
				-                    "Connection": "keep-alive",
			
 
				-                    "Cache-Control": "no-cache",
			
 
				-                }
			
 
				-
			
 
				-                # 重试机制
			
 
				-                max_retries = 2
			
 
				-                retries = 0
			
 
				-                success = False
			
 
				-
			
 
				-                while retries <= max_retries and not success:
			
 
				-                    try:
			
 
				-                        response = requests.get(url, headers=headers, timeout=10)
			
 
				-                        response.raise_for_status()
			
 
				-
			
 
				-                        data_text = response.text
			
 
				-                        data_json = json.loads(data_text)
			
 
				-
			
 
				-                        status = data_json.get("status", "未知")
			
 
				-                        if status not in ["success", "cache"]:
			
 
				-                            raise ValueError(f"响应状态异常: {status}")
			
 
				-
			
 
				-                        status_info = "最新数据" if status == "success" else "缓存数据"
			
 
				-                        print(f"获取 {id_value} 成功（{status_info}）")
			
 
				-
			
 
				-                        # 解析数据
			
 
				-                        results[id_value] = {}
			
 
				-                        for index, item in enumerate(data_json.get("items", []), 1):
			
 
				-                            title = item["title"]
			
 
				-                            url_link = item.get("url", "")
			
 
				-                            mobile_url = item.get("mobileUrl", "")
			
 
				-
			
 
				-                            if title in results[id_value]:
			
 
				-                                results[id_value][title]["ranks"].append(index)
			
 
				-                            else:
			
 
				-                                results[id_value][title] = {
			
 
				-                                    "ranks": [index],
			
 
				-                                    "url": url_link,
			
 
				-                                    "mobileUrl": mobile_url,
			
 
				-                                }
			
 
				-
			
 
				-                        success = True
			
 
				-
			
 
				-                    except Exception as e:
			
 
				-                        retries += 1
			
 
				-                        if retries <= max_retries:
			
 
				-                            wait_time = random.uniform(3, 5)
			
 
				-                            print(f"请求 {id_value} 失败: {e}. {wait_time:.2f}秒后重试...")
			
 
				-                            time.sleep(wait_time)
			
 
				-                        else:
			
 
				-                            print(f"请求 {id_value} 失败: {e}")
			
 
				-                            failed_ids.append(id_value)
			
 
				-
			
 
				-                # 请求间隔
			
 
				-                if i < len(ids) - 1:
			
 
				-                    actual_interval = request_interval + random.randint(-10, 20)
			
 
				-                    actual_interval = max(50, actual_interval)
			
 
				-                    time.sleep(actual_interval / 1000)
			
 
				-
			
 
				-            # 格式化返回数据
			
 
				-            news_data = []
			
 
				+            # 构建返回结果
			
 
				+            news_response_data = []
			
 
				             for platform_id, titles_data in results.items():
			
 
				                 platform_name = id_to_name.get(platform_id, platform_id)
			
 
				                 for title, info in titles_data.items():
			
@@ -230,131 +222,42 @@ class SystemManagementTools:
 
				                         "platform_id": platform_id,
			
 
				                         "platform_name": platform_name,
			
 
				                         "title": title,
			
 
				-                        "ranks": info["ranks"]
			
 
				+                        "ranks": info.get("ranks", [])
			
 
				                     }
			
 
				-
			
 
				-                    # 条件性添加 URL 字段
			
 
				                     if include_url:
			
 
				                         news_item["url"] = info.get("url", "")
			
 
				                         news_item["mobile_url"] = info.get("mobileUrl", "")
			
 
				+                    news_response_data.append(news_item)
			
 
				 
			
 
				-                    news_data.append(news_item)
			
 
				-
			
 
				-            # 获取北京时间
			
 
				-            beijing_tz = pytz.timezone("Asia/Shanghai")
			
 
				-            now = datetime.now(beijing_tz)
			
 
				-
			
 
				-            # 构建返回结果
			
 
				             result = {
			
 
				                 "success": True,
			
 
				                 "task_id": f"crawl_{int(time.time())}",
			
 
				                 "status": "completed",
			
 
				-                "crawl_time": now.strftime("%Y-%m-%d %H:%M:%S"),
			
 
				+                "crawl_time": current_time.strftime("%Y-%m-%d %H:%M:%S"),
			
 
				                 "platforms": list(results.keys()),
			
 
				-                "total_news": len(news_data),
			
 
				+                "total_news": len(news_response_data),
			
 
				                 "failed_platforms": failed_ids,
			
 
				-                "data": news_data,
			
 
				-                "saved_to_local": save_to_local
			
 
				+                "data": news_response_data,
			
 
				+                "saved_to_local": save_success and save_to_local
			
 
				             }
			
 
				 
			
 
				-            # 如果需要持久化，调用保存逻辑
			
 
				-            if save_to_local:
			
 
				-                try:
			
 
				-                    import re
			
 
				-
			
 
				-                    # 辅助函数：清理标题
			
 
				-                    def clean_title(title: str) -> str:
			
 
				-                        """清理标题中的特殊字符"""
			
 
				-                        if not isinstance(title, str):
			
 
				-                            title = str(title)
			
 
				-                        cleaned_title = title.replace("\n", " ").replace("\r", " ")
			
 
				-                        cleaned_title = re.sub(r"\s+", " ", cleaned_title)
			
 
				-                        cleaned_title = cleaned_title.strip()
			
 
				-                        return cleaned_title
			
 
				-
			
 
				-                    # 辅助函数：创建目录
			
 
				-                    def ensure_directory_exists(directory: str):
			
 
				-                        """确保目录存在"""
			
 
				-                        Path(directory).mkdir(parents=True, exist_ok=True)
			
 
				-
			
 
				-                    # 格式化日期和时间
			
 
				-                    date_folder = now.strftime("%Y年%m月%d日")
			
 
				-                    time_filename = now.strftime("%H时%M分")
			
 
				-
			
 
				-                    # 创建 txt 文件路径
			
 
				-                    txt_dir = self.project_root / "output" / date_folder / "txt"
			
 
				-                    ensure_directory_exists(str(txt_dir))
			
 
				-                    txt_file_path = txt_dir / f"{time_filename}.txt"
			
 
				-
			
 
				-                    # 创建 html 文件路径
			
 
				-                    html_dir = self.project_root / "output" / date_folder / "html"
			
 
				-                    ensure_directory_exists(str(html_dir))
			
 
				-                    html_file_path = html_dir / f"{time_filename}.html"
			
 
				-
			
 
				-                    # 保存 txt 文件（按照 main.py 的格式）
			
 
				-                    with open(txt_file_path, "w", encoding="utf-8") as f:
			
 
				-                        for id_value, title_data in results.items():
			
 
				-                            # id | name 或 id
			
 
				-                            name = id_to_name.get(id_value)
			
 
				-                            if name and name != id_value:
			
 
				-                                f.write(f"{id_value} | {name}\n")
			
 
				-                            else:
			
 
				-                                f.write(f"{id_value}\n")
			
 
				-
			
 
				-                            # 按排名排序标题
			
 
				-                            sorted_titles = []
			
 
				-                            for title, info in title_data.items():
			
 
				-                                cleaned = clean_title(title)
			
 
				-                                if isinstance(info, dict):
			
 
				-                                    ranks = info.get("ranks", [])
			
 
				-                                    url = info.get("url", "")
			
 
				-                                    mobile_url = info.get("mobileUrl", "")
			
 
				-                                else:
			
 
				-                                    ranks = info if isinstance(info, list) else []
			
 
				-                                    url = ""
			
 
				-                                    mobile_url = ""
			
 
				-
			
 
				-                                rank = ranks[0] if ranks else 1
			
 
				-                                sorted_titles.append((rank, cleaned, url, mobile_url))
			
 
				-
			
 
				-                            sorted_titles.sort(key=lambda x: x[0])
			
 
				-
			
 
				-                            for rank, cleaned, url, mobile_url in sorted_titles:
			
 
				-                                line = f"{rank}. {cleaned}"
			
 
				-                                if url:
			
 
				-                                    line += f" [URL:{url}]"
			
 
				-                                if mobile_url:
			
 
				-                                    line += f" [MOBILE:{mobile_url}]"
			
 
				-                                f.write(line + "\n")
			
 
				-
			
 
				-                            f.write("\n")
			
 
				-
			
 
				-                        if failed_ids:
			
 
				-                            f.write("==== 以下ID请求失败 ====\n")
			
 
				-                            for id_value in failed_ids:
			
 
				-                                f.write(f"{id_value}\n")
			
 
				-
			
 
				-                    # 保存 html 文件（简化版）
			
 
				-                    html_content = self._generate_simple_html(results, id_to_name, failed_ids, now)
			
 
				-                    with open(html_file_path, "w", encoding="utf-8") as f:
			
 
				-                        f.write(html_content)
			
 
				-
			
 
				-                    print(f"数据已保存到:")
			
 
				-                    print(f"  TXT: {txt_file_path}")
			
 
				-                    print(f"  HTML: {html_file_path}")
			
 
				-
			
 
				-                    result["saved_files"] = {
			
 
				-                        "txt": str(txt_file_path),
			
 
				-                        "html": str(html_file_path)
			
 
				-                    }
			
 
				-                    result["note"] = "数据已持久化到 output 文件夹"
			
 
				-
			
 
				-                except Exception as e:
			
 
				-                    print(f"保存文件失败: {e}")
			
 
				-                    result["save_error"] = str(e)
			
 
				-                    result["note"] = "爬取成功但保存失败，数据仅在内存中"
			
 
				+            if save_success:
			
 
				+                if save_to_local:
			
 
				+                    result["saved_files"] = saved_files
			
 
				+                    result["note"] = "数据已保存到 SQLite 数据库及 output 文件夹"
			
 
				+                else:
			
 
				+                    result["note"] = "数据已保存到 SQLite 数据库 (仅内存中返回结果，未生成TXT快照)"
			
 
				             else:
			
 
				-                result["note"] = "临时爬取结果，未持久化到output文件夹"
			
 
				+                # 明确告知用户保存失败
			
 
				+                result["saved_to_local"] = False
			
 
				+                result["save_error"] = save_error_msg
			
 
				+                if "Read-only file system" in save_error_msg or "Permission denied" in save_error_msg:
			
 
				+                    result["note"] = "爬取成功，但无法写入数据库（Docker只读模式）。数据仅在本次返回中有效。"
			
 
				+                else:
			
 
				+                    result["note"] = f"爬取成功但保存失败: {save_error_msg}"
			
 
				+
			
 
				+            # 清理资源
			
 
				+            storage.cleanup()
			
 
				 
			
 
				             return result
			
 
				 
			
--- a/mcp_server/utils/date_parser.py
+++ b/mcp_server/utils/date_parser.py
@@ -283,13 +283,13 @@ class DateParser:
 
				             date: datetime对象
			
 
				 
			
 
				         Returns:
			
 
				-            文件夹名称，格式: YYYY年MM月DD日
			
 
				+            文件夹名称，格式: YYYY-MM-DD
			
 
				 
			
 
				         Examples:
			
 
				             >>> DateParser.format_date_folder(datetime(2025, 10, 11))
			
 
				-            '2025年10月11日'
			
 
				+            '2025-10-11'
			
 
				         """
			
 
				-        return date.strftime("%Y年%m月%d日")
			
 
				+        return date.strftime("%Y-%m-%d")
			
 
				 
			
 
				     @staticmethod
			
 
				     def validate_date_not_future(date: datetime) -> None:
			
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 
				 [project]
			
 
				 name = "trendradar-mcp"
			
 
				-version = "1.0.3"
			
 
				+version = "1.1.0"
			
 
				 description = "TrendRadar MCP Server - 新闻热点聚合工具"
			
 
				 requires-python = ">=3.10"
			
 
				 dependencies = [
			
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,3 +3,4 @@ pytz>=2025.2,<2026.0
 
				 PyYAML>=6.0.3,<7.0.0
			
 
				 fastmcp>=2.12.0,<2.14.0
			
 
				 websockets>=13.0,<14.0
			
 
				+boto3>=1.35.0,<2.0.0
			
--- a/trendradar/__init__.py
+++ b/trendradar/__init__.py
@@ -0,0 +1,13 @@
 
				+# coding=utf-8
			
 
				+"""
			
 
				+TrendRadar - 热点新闻聚合与分析工具
			
 
				+
			
 
				+使用方式:
			
 
				+  python -m trendradar        # 模块执行
			
 
				+  trendradar                  # 安装后执行
			
 
				+"""
			
 
				+
			
 
				+from trendradar.context import AppContext
			
 
				+
			
 
				+__version__ = "4.0.0"
			
 
				+__all__ = ["AppContext", "__version__"]
			
--- a/trendradar/__main__.py
+++ b/trendradar/__main__.py
@@ -0,0 +1,719 @@
 
				+# coding=utf-8
			
 
				+"""
			
 
				+TrendRadar 主程序
			
 
				+
			
 
				+热点新闻聚合与分析工具
			
 
				+支持: python -m trendradar
			
 
				+"""
			
 
				+
			
 
				+import os
			
 
				+import webbrowser
			
 
				+from pathlib import Path
			
 
				+from typing import Dict, List, Tuple, Optional
			
 
				+
			
 
				+import requests
			
 
				+
			
 
				+from trendradar.context import AppContext
			
 
				+
			
 
				+# 版本号直接定义，避免循环导入
			
 
				+VERSION = "4.0.0"
			
 
				+from trendradar.core import load_config
			
 
				+from trendradar.crawler import DataFetcher
			
 
				+from trendradar.storage import convert_crawl_results_to_news_data
			
 
				+
			
 
				+
			
 
				+def check_version_update(
			
 
				+    current_version: str, version_url: str, proxy_url: Optional[str] = None
			
 
				+) -> Tuple[bool, Optional[str]]:
			
 
				+    """检查版本更新"""
			
 
				+    try:
			
 
				+        proxies = None
			
 
				+        if proxy_url:
			
 
				+            proxies = {"http": proxy_url, "https": proxy_url}
			
 
				+
			
 
				+        headers = {
			
 
				+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
			
 
				+            "Accept": "text/plain, */*",
			
 
				+            "Cache-Control": "no-cache",
			
 
				+        }
			
 
				+
			
 
				+        response = requests.get(
			
 
				+            version_url, proxies=proxies, headers=headers, timeout=10
			
 
				+        )
			
 
				+        response.raise_for_status()
			
 
				+
			
 
				+        remote_version = response.text.strip()
			
 
				+        print(f"当前版本: {current_version}, 远程版本: {remote_version}")
			
 
				+
			
 
				+        # 比较版本
			
 
				+        def parse_version(version_str):
			
 
				+            try:
			
 
				+                parts = version_str.strip().split(".")
			
 
				+                if len(parts) != 3:
			
 
				+                    raise ValueError("版本号格式不正确")
			
 
				+                return int(parts[0]), int(parts[1]), int(parts[2])
			
 
				+            except:
			
 
				+                return 0, 0, 0
			
 
				+
			
 
				+        current_tuple = parse_version(current_version)
			
 
				+        remote_tuple = parse_version(remote_version)
			
 
				+
			
 
				+        need_update = current_tuple < remote_tuple
			
 
				+        return need_update, remote_version if need_update else None
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        print(f"版本检查失败: {e}")
			
 
				+        return False, None
			
 
				+
			
 
				+
			
 
				+# === 主分析器 ===
			
 
				+class NewsAnalyzer:
			
 
				+    """新闻分析器"""
			
 
				+
			
 
				+    # 模式策略定义
			
 
				+    MODE_STRATEGIES = {
			
 
				+        "incremental": {
			
 
				+            "mode_name": "增量模式",
			
 
				+            "description": "增量模式（只关注新增新闻，无新增时不推送）",
			
 
				+            "realtime_report_type": "实时增量",
			
 
				+            "summary_report_type": "当日汇总",
			
 
				+            "should_send_realtime": True,
			
 
				+            "should_generate_summary": True,
			
 
				+            "summary_mode": "daily",
			
 
				+        },
			
 
				+        "current": {
			
 
				+            "mode_name": "当前榜单模式",
			
 
				+            "description": "当前榜单模式（当前榜单匹配新闻 + 新增新闻区域 + 按时推送）",
			
 
				+            "realtime_report_type": "实时当前榜单",
			
 
				+            "summary_report_type": "当前榜单汇总",
			
 
				+            "should_send_realtime": True,
			
 
				+            "should_generate_summary": True,
			
 
				+            "summary_mode": "current",
			
 
				+        },
			
 
				+        "daily": {
			
 
				+            "mode_name": "当日汇总模式",
			
 
				+            "description": "当日汇总模式（所有匹配新闻 + 新增新闻区域 + 按时推送）",
			
 
				+            "realtime_report_type": "",
			
 
				+            "summary_report_type": "当日汇总",
			
 
				+            "should_send_realtime": False,
			
 
				+            "should_generate_summary": True,
			
 
				+            "summary_mode": "daily",
			
 
				+        },
			
 
				+    }
			
 
				+
			
 
				+    def __init__(self):
			
 
				+        # 加载配置
			
 
				+        print("正在加载配置...")
			
 
				+        config = load_config()
			
 
				+        print(f"TrendRadar v{VERSION} 配置加载完成")
			
 
				+        print(f"监控平台数量: {len(config['PLATFORMS'])}")
			
 
				+        print(f"时区: {config.get('TIMEZONE', 'Asia/Shanghai')}")
			
 
				+
			
 
				+        # 创建应用上下文
			
 
				+        self.ctx = AppContext(config)
			
 
				+
			
 
				+        self.request_interval = self.ctx.config["REQUEST_INTERVAL"]
			
 
				+        self.report_mode = self.ctx.config["REPORT_MODE"]
			
 
				+        self.rank_threshold = self.ctx.rank_threshold
			
 
				+        self.is_github_actions = os.environ.get("GITHUB_ACTIONS") == "true"
			
 
				+        self.is_docker_container = self._detect_docker_environment()
			
 
				+        self.update_info = None
			
 
				+        self.proxy_url = None
			
 
				+        self._setup_proxy()
			
 
				+        self.data_fetcher = DataFetcher(self.proxy_url)
			
 
				+
			
 
				+        # 初始化存储管理器（使用 AppContext）
			
 
				+        self._init_storage_manager()
			
 
				+
			
 
				+        if self.is_github_actions:
			
 
				+            self._check_version_update()
			
 
				+
			
 
				+    def _init_storage_manager(self) -> None:
			
 
				+        """初始化存储管理器（使用 AppContext）"""
			
 
				+        # 获取数据保留天数（支持环境变量覆盖）
			
 
				+        env_retention = os.environ.get("STORAGE_RETENTION_DAYS", "").strip()
			
 
				+        if env_retention:
			
 
				+            # 环境变量覆盖配置
			
 
				+            self.ctx.config["STORAGE"]["RETENTION_DAYS"] = int(env_retention)
			
 
				+
			
 
				+        self.storage_manager = self.ctx.get_storage_manager()
			
 
				+        print(f"存储后端: {self.storage_manager.backend_name}")
			
 
				+
			
 
				+        retention_days = self.ctx.config.get("STORAGE", {}).get("RETENTION_DAYS", 0)
			
 
				+        if retention_days > 0:
			
 
				+            print(f"数据保留天数: {retention_days} 天")
			
 
				+
			
 
				+    def _detect_docker_environment(self) -> bool:
			
 
				+        """检测是否运行在 Docker 容器中"""
			
 
				+        try:
			
 
				+            if os.environ.get("DOCKER_CONTAINER") == "true":
			
 
				+                return True
			
 
				+
			
 
				+            if os.path.exists("/.dockerenv"):
			
 
				+                return True
			
 
				+
			
 
				+            return False
			
 
				+        except Exception:
			
 
				+            return False
			
 
				+
			
 
				+    def _should_open_browser(self) -> bool:
			
 
				+        """判断是否应该打开浏览器"""
			
 
				+        return not self.is_github_actions and not self.is_docker_container
			
 
				+
			
 
				+    def _setup_proxy(self) -> None:
			
 
				+        """设置代理配置"""
			
 
				+        if not self.is_github_actions and self.ctx.config["USE_PROXY"]:
			
 
				+            self.proxy_url = self.ctx.config["DEFAULT_PROXY"]
			
 
				+            print("本地环境，使用代理")
			
 
				+        elif not self.is_github_actions and not self.ctx.config["USE_PROXY"]:
			
 
				+            print("本地环境，未启用代理")
			
 
				+        else:
			
 
				+            print("GitHub Actions环境，不使用代理")
			
 
				+
			
 
				+    def _check_version_update(self) -> None:
			
 
				+        """检查版本更新"""
			
 
				+        try:
			
 
				+            need_update, remote_version = check_version_update(
			
 
				+                VERSION, self.ctx.config["VERSION_CHECK_URL"], self.proxy_url
			
 
				+            )
			
 
				+
			
 
				+            if need_update and remote_version:
			
 
				+                self.update_info = {
			
 
				+                    "current_version": VERSION,
			
 
				+                    "remote_version": remote_version,
			
 
				+                }
			
 
				+                print(f"发现新版本: {remote_version} (当前: {VERSION})")
			
 
				+            else:
			
 
				+                print("版本检查完成，当前为最新版本")
			
 
				+        except Exception as e:
			
 
				+            print(f"版本检查出错: {e}")
			
 
				+
			
 
				+    def _get_mode_strategy(self) -> Dict:
			
 
				+        """获取当前模式的策略配置"""
			
 
				+        return self.MODE_STRATEGIES.get(self.report_mode, self.MODE_STRATEGIES["daily"])
			
 
				+
			
 
				+    def _has_notification_configured(self) -> bool:
			
 
				+        """检查是否配置了任何通知渠道"""
			
 
				+        cfg = self.ctx.config
			
 
				+        return any(
			
 
				+            [
			
 
				+                cfg["FEISHU_WEBHOOK_URL"],
			
 
				+                cfg["DINGTALK_WEBHOOK_URL"],
			
 
				+                cfg["WEWORK_WEBHOOK_URL"],
			
 
				+                (cfg["TELEGRAM_BOT_TOKEN"] and cfg["TELEGRAM_CHAT_ID"]),
			
 
				+                (
			
 
				+                    cfg["EMAIL_FROM"]
			
 
				+                    and cfg["EMAIL_PASSWORD"]
			
 
				+                    and cfg["EMAIL_TO"]
			
 
				+                ),
			
 
				+                (cfg["NTFY_SERVER_URL"] and cfg["NTFY_TOPIC"]),
			
 
				+                cfg["BARK_URL"],
			
 
				+                cfg["SLACK_WEBHOOK_URL"],
			
 
				+            ]
			
 
				+        )
			
 
				+
			
 
				+    def _has_valid_content(
			
 
				+        self, stats: List[Dict], new_titles: Optional[Dict] = None
			
 
				+    ) -> bool:
			
 
				+        """检查是否有有效的新闻内容"""
			
 
				+        if self.report_mode in ["incremental", "current"]:
			
 
				+            # 增量模式和current模式下，只要stats有内容就说明有匹配的新闻
			
 
				+            return any(stat["count"] > 0 for stat in stats)
			
 
				+        else:
			
 
				+            # 当日汇总模式下，检查是否有匹配的频率词新闻或新增新闻
			
 
				+            has_matched_news = any(stat["count"] > 0 for stat in stats)
			
 
				+            has_new_news = bool(
			
 
				+                new_titles and any(len(titles) > 0 for titles in new_titles.values())
			
 
				+            )
			
 
				+            return has_matched_news or has_new_news
			
 
				+
			
 
				+    def _load_analysis_data(
			
 
				+        self,
			
 
				+    ) -> Optional[Tuple[Dict, Dict, Dict, Dict, List, List]]:
			
 
				+        """统一的数据加载和预处理，使用当前监控平台列表过滤历史数据"""
			
 
				+        try:
			
 
				+            # 获取当前配置的监控平台ID列表
			
 
				+            current_platform_ids = self.ctx.platform_ids
			
 
				+            print(f"当前监控平台: {current_platform_ids}")
			
 
				+
			
 
				+            all_results, id_to_name, title_info = self.ctx.read_today_titles(
			
 
				+                current_platform_ids
			
 
				+            )
			
 
				+
			
 
				+            if not all_results:
			
 
				+                print("没有找到当天的数据")
			
 
				+                return None
			
 
				+
			
 
				+            total_titles = sum(len(titles) for titles in all_results.values())
			
 
				+            print(f"读取到 {total_titles} 个标题（已按当前监控平台过滤）")
			
 
				+
			
 
				+            new_titles = self.ctx.detect_new_titles(current_platform_ids)
			
 
				+            word_groups, filter_words, global_filters = self.ctx.load_frequency_words()
			
 
				+
			
 
				+            return (
			
 
				+                all_results,
			
 
				+                id_to_name,
			
 
				+                title_info,
			
 
				+                new_titles,
			
 
				+                word_groups,
			
 
				+                filter_words,
			
 
				+                global_filters,
			
 
				+            )
			
 
				+        except Exception as e:
			
 
				+            print(f"数据加载失败: {e}")
			
 
				+            return None
			
 
				+
			
 
				+    def _prepare_current_title_info(self, results: Dict, time_info: str) -> Dict:
			
 
				+        """从当前抓取结果构建标题信息"""
			
 
				+        title_info = {}
			
 
				+        for source_id, titles_data in results.items():
			
 
				+            title_info[source_id] = {}
			
 
				+            for title, title_data in titles_data.items():
			
 
				+                ranks = title_data.get("ranks", [])
			
 
				+                url = title_data.get("url", "")
			
 
				+                mobile_url = title_data.get("mobileUrl", "")
			
 
				+
			
 
				+                title_info[source_id][title] = {
			
 
				+                    "first_time": time_info,
			
 
				+                    "last_time": time_info,
			
 
				+                    "count": 1,
			
 
				+                    "ranks": ranks,
			
 
				+                    "url": url,
			
 
				+                    "mobileUrl": mobile_url,
			
 
				+                }
			
 
				+        return title_info
			
 
				+
			
 
				+    def _run_analysis_pipeline(
			
 
				+        self,
			
 
				+        data_source: Dict,
			
 
				+        mode: str,
			
 
				+        title_info: Dict,
			
 
				+        new_titles: Dict,
			
 
				+        word_groups: List[Dict],
			
 
				+        filter_words: List[str],
			
 
				+        id_to_name: Dict,
			
 
				+        failed_ids: Optional[List] = None,
			
 
				+        is_daily_summary: bool = False,
			
 
				+        global_filters: Optional[List[str]] = None,
			
 
				+    ) -> Tuple[List[Dict], Optional[str]]:
			
 
				+        """统一的分析流水线：数据处理 → 统计计算 → HTML生成"""
			
 
				+
			
 
				+        # 统计计算（使用 AppContext）
			
 
				+        stats, total_titles = self.ctx.count_frequency(
			
 
				+            data_source,
			
 
				+            word_groups,
			
 
				+            filter_words,
			
 
				+            id_to_name,
			
 
				+            title_info,
			
 
				+            new_titles,
			
 
				+            mode=mode,
			
 
				+            global_filters=global_filters,
			
 
				+        )
			
 
				+
			
 
				+        # HTML生成（如果启用）
			
 
				+        html_file = None
			
 
				+        if self.ctx.config["STORAGE"]["FORMATS"]["HTML"]:
			
 
				+            html_file = self.ctx.generate_html(
			
 
				+                stats,
			
 
				+                total_titles,
			
 
				+                failed_ids=failed_ids,
			
 
				+                new_titles=new_titles,
			
 
				+                id_to_name=id_to_name,
			
 
				+                mode=mode,
			
 
				+                is_daily_summary=is_daily_summary,
			
 
				+                update_info=self.update_info if self.ctx.config["SHOW_VERSION_UPDATE"] else None,
			
 
				+            )
			
 
				+
			
 
				+        return stats, html_file
			
 
				+
			
 
				+    def _send_notification_if_needed(
			
 
				+        self,
			
 
				+        stats: List[Dict],
			
 
				+        report_type: str,
			
 
				+        mode: str,
			
 
				+        failed_ids: Optional[List] = None,
			
 
				+        new_titles: Optional[Dict] = None,
			
 
				+        id_to_name: Optional[Dict] = None,
			
 
				+        html_file_path: Optional[str] = None,
			
 
				+    ) -> bool:
			
 
				+        """统一的通知发送逻辑，包含所有判断条件"""
			
 
				+        has_notification = self._has_notification_configured()
			
 
				+        cfg = self.ctx.config
			
 
				+
			
 
				+        if (
			
 
				+            cfg["ENABLE_NOTIFICATION"]
			
 
				+            and has_notification
			
 
				+            and self._has_valid_content(stats, new_titles)
			
 
				+        ):
			
 
				+            # 推送窗口控制
			
 
				+            if cfg["PUSH_WINDOW"]["ENABLED"]:
			
 
				+                push_manager = self.ctx.create_push_manager()
			
 
				+                time_range_start = cfg["PUSH_WINDOW"]["TIME_RANGE"]["START"]
			
 
				+                time_range_end = cfg["PUSH_WINDOW"]["TIME_RANGE"]["END"]
			
 
				+
			
 
				+                if not push_manager.is_in_time_range(time_range_start, time_range_end):
			
 
				+                    now = self.ctx.get_time()
			
 
				+                    print(
			
 
				+                        f"推送窗口控制：当前时间 {now.strftime('%H:%M')} 不在推送时间窗口 {time_range_start}-{time_range_end} 内，跳过推送"
			
 
				+                    )
			
 
				+                    return False
			
 
				+
			
 
				+                if cfg["PUSH_WINDOW"]["ONCE_PER_DAY"]:
			
 
				+                    if push_manager.has_pushed_today():
			
 
				+                        print(f"推送窗口控制：今天已推送过，跳过本次推送")
			
 
				+                        return False
			
 
				+                    else:
			
 
				+                        print(f"推送窗口控制：今天首次推送")
			
 
				+
			
 
				+            # 准备报告数据
			
 
				+            report_data = self.ctx.prepare_report(stats, failed_ids, new_titles, id_to_name, mode)
			
 
				+
			
 
				+            # 是否发送版本更新信息
			
 
				+            update_info_to_send = self.update_info if cfg["SHOW_VERSION_UPDATE"] else None
			
 
				+
			
 
				+            # 使用 NotificationDispatcher 发送到所有渠道
			
 
				+            dispatcher = self.ctx.create_notification_dispatcher()
			
 
				+            results = dispatcher.dispatch_all(
			
 
				+                report_data=report_data,
			
 
				+                report_type=report_type,
			
 
				+                update_info=update_info_to_send,
			
 
				+                proxy_url=self.proxy_url,
			
 
				+                mode=mode,
			
 
				+                html_file_path=html_file_path,
			
 
				+            )
			
 
				+
			
 
				+            if not results:
			
 
				+                print("未配置任何通知渠道，跳过通知发送")
			
 
				+                return False
			
 
				+
			
 
				+            # 如果成功发送了任何通知，且启用了每天只推一次，则记录推送
			
 
				+            if (
			
 
				+                cfg["PUSH_WINDOW"]["ENABLED"]
			
 
				+                and cfg["PUSH_WINDOW"]["ONCE_PER_DAY"]
			
 
				+                and any(results.values())
			
 
				+            ):
			
 
				+                push_manager = self.ctx.create_push_manager()
			
 
				+                push_manager.record_push(report_type)
			
 
				+
			
 
				+            return True
			
 
				+
			
 
				+        elif cfg["ENABLE_NOTIFICATION"] and not has_notification:
			
 
				+            print("⚠️ 警告：通知功能已启用但未配置任何通知渠道，将跳过通知发送")
			
 
				+        elif not cfg["ENABLE_NOTIFICATION"]:
			
 
				+            print(f"跳过{report_type}通知：通知功能已禁用")
			
 
				+        elif (
			
 
				+            cfg["ENABLE_NOTIFICATION"]
			
 
				+            and has_notification
			
 
				+            and not self._has_valid_content(stats, new_titles)
			
 
				+        ):
			
 
				+            mode_strategy = self._get_mode_strategy()
			
 
				+            if "实时" in report_type:
			
 
				+                print(
			
 
				+                    f"跳过实时推送通知：{mode_strategy['mode_name']}下未检测到匹配的新闻"
			
 
				+                )
			
 
				+            else:
			
 
				+                print(
			
 
				+                    f"跳过{mode_strategy['summary_report_type']}通知：未匹配到有效的新闻内容"
			
 
				+                )
			
 
				+
			
 
				+        return False
			
 
				+
			
 
				+    def _generate_summary_report(self, mode_strategy: Dict) -> Optional[str]:
			
 
				+        """生成汇总报告（带通知）"""
			
 
				+        summary_type = (
			
 
				+            "当前榜单汇总" if mode_strategy["summary_mode"] == "current" else "当日汇总"
			
 
				+        )
			
 
				+        print(f"生成{summary_type}报告...")
			
 
				+
			
 
				+        # 加载分析数据
			
 
				+        analysis_data = self._load_analysis_data()
			
 
				+        if not analysis_data:
			
 
				+            return None
			
 
				+
			
 
				+        all_results, id_to_name, title_info, new_titles, word_groups, filter_words, global_filters = (
			
 
				+            analysis_data
			
 
				+        )
			
 
				+
			
 
				+        # 运行分析流水线
			
 
				+        stats, html_file = self._run_analysis_pipeline(
			
 
				+            all_results,
			
 
				+            mode_strategy["summary_mode"],
			
 
				+            title_info,
			
 
				+            new_titles,
			
 
				+            word_groups,
			
 
				+            filter_words,
			
 
				+            id_to_name,
			
 
				+            is_daily_summary=True,
			
 
				+            global_filters=global_filters,
			
 
				+        )
			
 
				+
			
 
				+        if html_file:
			
 
				+            print(f"{summary_type}报告已生成: {html_file}")
			
 
				+
			
 
				+        # 发送通知
			
 
				+        self._send_notification_if_needed(
			
 
				+            stats,
			
 
				+            mode_strategy["summary_report_type"],
			
 
				+            mode_strategy["summary_mode"],
			
 
				+            failed_ids=[],
			
 
				+            new_titles=new_titles,
			
 
				+            id_to_name=id_to_name,
			
 
				+            html_file_path=html_file,
			
 
				+        )
			
 
				+
			
 
				+        return html_file
			
 
				+
			
 
				+    def _generate_summary_html(self, mode: str = "daily") -> Optional[str]:
			
 
				+        """生成汇总HTML"""
			
 
				+        summary_type = "当前榜单汇总" if mode == "current" else "当日汇总"
			
 
				+        print(f"生成{summary_type}HTML...")
			
 
				+
			
 
				+        # 加载分析数据
			
 
				+        analysis_data = self._load_analysis_data()
			
 
				+        if not analysis_data:
			
 
				+            return None
			
 
				+
			
 
				+        all_results, id_to_name, title_info, new_titles, word_groups, filter_words, global_filters = (
			
 
				+            analysis_data
			
 
				+        )
			
 
				+
			
 
				+        # 运行分析流水线
			
 
				+        _, html_file = self._run_analysis_pipeline(
			
 
				+            all_results,
			
 
				+            mode,
			
 
				+            title_info,
			
 
				+            new_titles,
			
 
				+            word_groups,
			
 
				+            filter_words,
			
 
				+            id_to_name,
			
 
				+            is_daily_summary=True,
			
 
				+            global_filters=global_filters,
			
 
				+        )
			
 
				+
			
 
				+        if html_file:
			
 
				+            print(f"{summary_type}HTML已生成: {html_file}")
			
 
				+        return html_file
			
 
				+
			
 
				+    def _initialize_and_check_config(self) -> None:
			
 
				+        """通用初始化和配置检查"""
			
 
				+        now = self.ctx.get_time()
			
 
				+        print(f"当前北京时间: {now.strftime('%Y-%m-%d %H:%M:%S')}")
			
 
				+
			
 
				+        if not self.ctx.config["ENABLE_CRAWLER"]:
			
 
				+            print("爬虫功能已禁用（ENABLE_CRAWLER=False），程序退出")
			
 
				+            return
			
 
				+
			
 
				+        has_notification = self._has_notification_configured()
			
 
				+        if not self.ctx.config["ENABLE_NOTIFICATION"]:
			
 
				+            print("通知功能已禁用（ENABLE_NOTIFICATION=False），将只进行数据抓取")
			
 
				+        elif not has_notification:
			
 
				+            print("未配置任何通知渠道，将只进行数据抓取，不发送通知")
			
 
				+        else:
			
 
				+            print("通知功能已启用，将发送通知")
			
 
				+
			
 
				+        mode_strategy = self._get_mode_strategy()
			
 
				+        print(f"报告模式: {self.report_mode}")
			
 
				+        print(f"运行模式: {mode_strategy['description']}")
			
 
				+
			
 
				+    def _crawl_data(self) -> Tuple[Dict, Dict, List]:
			
 
				+        """执行数据爬取"""
			
 
				+        ids = []
			
 
				+        for platform in self.ctx.platforms:
			
 
				+            if "name" in platform:
			
 
				+                ids.append((platform["id"], platform["name"]))
			
 
				+            else:
			
 
				+                ids.append(platform["id"])
			
 
				+
			
 
				+        print(
			
 
				+            f"配置的监控平台: {[p.get('name', p['id']) for p in self.ctx.platforms]}"
			
 
				+        )
			
 
				+        print(f"开始爬取数据，请求间隔 {self.request_interval} 毫秒")
			
 
				+        Path("output").mkdir(parents=True, exist_ok=True)
			
 
				+
			
 
				+        results, id_to_name, failed_ids = self.data_fetcher.crawl_websites(
			
 
				+            ids, self.request_interval
			
 
				+        )
			
 
				+
			
 
				+        # 转换为 NewsData 格式并保存到存储后端
			
 
				+        crawl_time = self.ctx.format_time()
			
 
				+        crawl_date = self.ctx.format_date()
			
 
				+        news_data = convert_crawl_results_to_news_data(
			
 
				+            results, id_to_name, failed_ids, crawl_time, crawl_date
			
 
				+        )
			
 
				+
			
 
				+        # 保存到存储后端（SQLite）
			
 
				+        if self.storage_manager.save_news_data(news_data):
			
 
				+            print(f"数据已保存到存储后端: {self.storage_manager.backend_name}")
			
 
				+
			
 
				+        # 保存 TXT 快照（如果启用）
			
 
				+        txt_file = self.storage_manager.save_txt_snapshot(news_data)
			
 
				+        if txt_file:
			
 
				+            print(f"TXT 快照已保存: {txt_file}")
			
 
				+
			
 
				+        # 兼容：同时保存到原有 TXT 格式（确保向后兼容）
			
 
				+        if self.ctx.config["STORAGE"]["FORMATS"]["TXT"]:
			
 
				+            title_file = self.ctx.save_titles(results, id_to_name, failed_ids)
			
 
				+            print(f"标题已保存到: {title_file}")
			
 
				+
			
 
				+        return results, id_to_name, failed_ids
			
 
				+
			
 
				+    def _execute_mode_strategy(
			
 
				+        self, mode_strategy: Dict, results: Dict, id_to_name: Dict, failed_ids: List
			
 
				+    ) -> Optional[str]:
			
 
				+        """执行模式特定逻辑"""
			
 
				+        # 获取当前监控平台ID列表
			
 
				+        current_platform_ids = self.ctx.platform_ids
			
 
				+
			
 
				+        new_titles = self.ctx.detect_new_titles(current_platform_ids)
			
 
				+        time_info = self.ctx.format_time()
			
 
				+        if self.ctx.config["STORAGE"]["FORMATS"]["TXT"]:
			
 
				+            self.ctx.save_titles(results, id_to_name, failed_ids)
			
 
				+        word_groups, filter_words, global_filters = self.ctx.load_frequency_words()
			
 
				+
			
 
				+        # current模式下，实时推送需要使用完整的历史数据来保证统计信息的完整性
			
 
				+        if self.report_mode == "current":
			
 
				+            # 加载完整的历史数据（已按当前平台过滤）
			
 
				+            analysis_data = self._load_analysis_data()
			
 
				+            if analysis_data:
			
 
				+                (
			
 
				+                    all_results,
			
 
				+                    historical_id_to_name,
			
 
				+                    historical_title_info,
			
 
				+                    historical_new_titles,
			
 
				+                    _,
			
 
				+                    _,
			
 
				+                    _,
			
 
				+                ) = analysis_data
			
 
				+
			
 
				+                print(
			
 
				+                    f"current模式：使用过滤后的历史数据，包含平台：{list(all_results.keys())}"
			
 
				+                )
			
 
				+
			
 
				+                stats, html_file = self._run_analysis_pipeline(
			
 
				+                    all_results,
			
 
				+                    self.report_mode,
			
 
				+                    historical_title_info,
			
 
				+                    historical_new_titles,
			
 
				+                    word_groups,
			
 
				+                    filter_words,
			
 
				+                    historical_id_to_name,
			
 
				+                    failed_ids=failed_ids,
			
 
				+                    global_filters=global_filters,
			
 
				+                )
			
 
				+
			
 
				+                combined_id_to_name = {**historical_id_to_name, **id_to_name}
			
 
				+
			
 
				+                if html_file:
			
 
				+                    print(f"HTML报告已生成: {html_file}")
			
 
				+
			
 
				+                # 发送实时通知（使用完整历史数据的统计结果）
			
 
				+                summary_html = None
			
 
				+                if mode_strategy["should_send_realtime"]:
			
 
				+                    self._send_notification_if_needed(
			
 
				+                        stats,
			
 
				+                        mode_strategy["realtime_report_type"],
			
 
				+                        self.report_mode,
			
 
				+                        failed_ids=failed_ids,
			
 
				+                        new_titles=historical_new_titles,
			
 
				+                        id_to_name=combined_id_to_name,
			
 
				+                        html_file_path=html_file,
			
 
				+                    )
			
 
				+            else:
			
 
				+                print("❌ 严重错误：无法读取刚保存的数据文件")
			
 
				+                raise RuntimeError("数据一致性检查失败：保存后立即读取失败")
			
 
				+        else:
			
 
				+            title_info = self._prepare_current_title_info(results, time_info)
			
 
				+            stats, html_file = self._run_analysis_pipeline(
			
 
				+                results,
			
 
				+                self.report_mode,
			
 
				+                title_info,
			
 
				+                new_titles,
			
 
				+                word_groups,
			
 
				+                filter_words,
			
 
				+                id_to_name,
			
 
				+                failed_ids=failed_ids,
			
 
				+                global_filters=global_filters,
			
 
				+            )
			
 
				+            if html_file:
			
 
				+                print(f"HTML报告已生成: {html_file}")
			
 
				+
			
 
				+            # 发送实时通知（如果需要）
			
 
				+            summary_html = None
			
 
				+            if mode_strategy["should_send_realtime"]:
			
 
				+                self._send_notification_if_needed(
			
 
				+                    stats,
			
 
				+                    mode_strategy["realtime_report_type"],
			
 
				+                    self.report_mode,
			
 
				+                    failed_ids=failed_ids,
			
 
				+                    new_titles=new_titles,
			
 
				+                    id_to_name=id_to_name,
			
 
				+                    html_file_path=html_file,
			
 
				+                )
			
 
				+
			
 
				+        # 生成汇总报告（如果需要）
			
 
				+        summary_html = None
			
 
				+        if mode_strategy["should_generate_summary"]:
			
 
				+            if mode_strategy["should_send_realtime"]:
			
 
				+                # 如果已经发送了实时通知，汇总只生成HTML不发送通知
			
 
				+                summary_html = self._generate_summary_html(
			
 
				+                    mode_strategy["summary_mode"]
			
 
				+                )
			
 
				+            else:
			
 
				+                # daily模式：直接生成汇总报告并发送通知
			
 
				+                summary_html = self._generate_summary_report(mode_strategy)
			
 
				+
			
 
				+        # 打开浏览器（仅在非容器环境）
			
 
				+        if self._should_open_browser() and html_file:
			
 
				+            if summary_html:
			
 
				+                summary_url = "file://" + str(Path(summary_html).resolve())
			
 
				+                print(f"正在打开汇总报告: {summary_url}")
			
 
				+                webbrowser.open(summary_url)
			
 
				+            else:
			
 
				+                file_url = "file://" + str(Path(html_file).resolve())
			
 
				+                print(f"正在打开HTML报告: {file_url}")
			
 
				+                webbrowser.open(file_url)
			
 
				+        elif self.is_docker_container and html_file:
			
 
				+            if summary_html:
			
 
				+                print(f"汇总报告已生成（Docker环境）: {summary_html}")
			
 
				+            else:
			
 
				+                print(f"HTML报告已生成（Docker环境）: {html_file}")
			
 
				+
			
 
				+        return summary_html
			
 
				+
			
 
				+    def run(self) -> None:
			
 
				+        """执行分析流程"""
			
 
				+        try:
			
 
				+            self._initialize_and_check_config()
			
 
				+
			
 
				+            mode_strategy = self._get_mode_strategy()
			
 
				+
			
 
				+            results, id_to_name, failed_ids = self._crawl_data()
			
 
				+
			
 
				+            self._execute_mode_strategy(mode_strategy, results, id_to_name, failed_ids)
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            print(f"分析流程执行出错: {e}")
			
 
				+            raise
			
 
				+        finally:
			
 
				+            # 清理资源（包括过期数据清理和数据库连接关闭）
			
 
				+            self.ctx.cleanup()
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    """主程序入口"""
			
 
				+    try:
			
 
				+        analyzer = NewsAnalyzer()
			
 
				+        analyzer.run()
			
 
				+    except FileNotFoundError as e:
			
 
				+        print(f"❌ 配置文件错误: {e}")
			
 
				+        print("\n请确保以下文件存在:")
			
 
				+        print("  • config/config.yaml")
			
 
				+        print("  • config/frequency_words.txt")
			
 
				+        print("\n参考项目文档进行正确配置")
			
 
				+    except Exception as e:
			
 
				+        print(f"❌ 程序运行错误: {e}")
			
 
				+        raise
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main()
			
--- a/trendradar/context.py
+++ b/trendradar/context.py
@@ -0,0 +1,388 @@
 
				+# coding=utf-8
			
 
				+"""
			
 
				+应用上下文模块
			
 
				+
			
 
				+提供配置上下文类，封装所有依赖配置的操作，消除全局状态和包装函数。
			
 
				+"""
			
 
				+
			
 
				+from datetime import datetime
			
 
				+from pathlib import Path
			
 
				+from typing import Any, Callable, Dict, List, Optional, Tuple
			
 
				+
			
 
				+from trendradar.utils.time import (
			
 
				+    get_configured_time,
			
 
				+    format_date_folder,
			
 
				+    format_time_filename,
			
 
				+    get_current_time_display,
			
 
				+    convert_time_for_display,
			
 
				+)
			
 
				+from trendradar.core import (
			
 
				+    load_frequency_words,
			
 
				+    matches_word_groups,
			
 
				+    save_titles_to_file,
			
 
				+    read_all_today_titles,
			
 
				+    detect_latest_new_titles,
			
 
				+    is_first_crawl_today,
			
 
				+    count_word_frequency,
			
 
				+)
			
 
				+from trendradar.report import (
			
 
				+    clean_title,
			
 
				+    prepare_report_data,
			
 
				+    generate_html_report,
			
 
				+    render_html_content,
			
 
				+)
			
 
				+from trendradar.notification import (
			
 
				+    render_feishu_content,
			
 
				+    render_dingtalk_content,
			
 
				+    split_content_into_batches,
			
 
				+    NotificationDispatcher,
			
 
				+    PushRecordManager,
			
 
				+)
			
 
				+from trendradar.storage import get_storage_manager
			
 
				+
			
 
				+
			
 
				+class AppContext:
			
 
				+    """
			
 
				+    应用上下文类
			
 
				+
			
 
				+    封装所有依赖配置的操作，提供统一的接口。
			
 
				+    消除对全局 CONFIG 的依赖，提高可测试性。
			
 
				+
			
 
				+    使用示例:
			
 
				+        config = load_config()
			
 
				+        ctx = AppContext(config)
			
 
				+
			
 
				+        # 时间操作
			
 
				+        now = ctx.get_time()
			
 
				+        date_folder = ctx.format_date()
			
 
				+
			
 
				+        # 存储操作
			
 
				+        storage = ctx.get_storage_manager()
			
 
				+
			
 
				+        # 报告生成
			
 
				+        html = ctx.generate_html_report(stats, total_titles, ...)
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, config: Dict[str, Any]):
			
 
				+        """
			
 
				+        初始化应用上下文
			
 
				+
			
 
				+        Args:
			
 
				+            config: 完整的配置字典
			
 
				+        """
			
 
				+        self.config = config
			
 
				+        self._storage_manager = None
			
 
				+
			
 
				+    # === 配置访问 ===
			
 
				+
			
 
				+    @property
			
 
				+    def timezone(self) -> str:
			
 
				+        """获取配置的时区"""
			
 
				+        return self.config.get("TIMEZONE", "Asia/Shanghai")
			
 
				+
			
 
				+    @property
			
 
				+    def rank_threshold(self) -> int:
			
 
				+        """获取排名阈值"""
			
 
				+        return self.config.get("RANK_THRESHOLD", 50)
			
 
				+
			
 
				+    @property
			
 
				+    def weight_config(self) -> Dict:
			
 
				+        """获取权重配置"""
			
 
				+        return self.config.get("WEIGHT_CONFIG", {})
			
 
				+
			
 
				+    @property
			
 
				+    def platforms(self) -> List[Dict]:
			
 
				+        """获取平台配置列表"""
			
 
				+        return self.config.get("PLATFORMS", [])
			
 
				+
			
 
				+    @property
			
 
				+    def platform_ids(self) -> List[str]:
			
 
				+        """获取平台ID列表"""
			
 
				+        return [p["id"] for p in self.platforms]
			
 
				+
			
 
				+    # === 时间操作 ===
			
 
				+
			
 
				+    def get_time(self) -> datetime:
			
 
				+        """获取当前配置时区的时间"""
			
 
				+        return get_configured_time(self.timezone)
			
 
				+
			
 
				+    def format_date(self) -> str:
			
 
				+        """格式化日期文件夹 (YYYY-MM-DD)"""
			
 
				+        return format_date_folder(timezone=self.timezone)
			
 
				+
			
 
				+    def format_time(self) -> str:
			
 
				+        """格式化时间文件名 (HH-MM)"""
			
 
				+        return format_time_filename(self.timezone)
			
 
				+
			
 
				+    def get_time_display(self) -> str:
			
 
				+        """获取时间显示 (HH:MM)"""
			
 
				+        return get_current_time_display(self.timezone)
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def convert_time_display(time_str: str) -> str:
			
 
				+        """将 HH-MM 转换为 HH:MM"""
			
 
				+        return convert_time_for_display(time_str)
			
 
				+
			
 
				+    # === 存储操作 ===
			
 
				+
			
 
				+    def get_storage_manager(self):
			
 
				+        """获取存储管理器（延迟初始化，单例）"""
			
 
				+        if self._storage_manager is None:
			
 
				+            storage_config = self.config.get("STORAGE", {})
			
 
				+            remote_config = storage_config.get("REMOTE", {})
			
 
				+            local_config = storage_config.get("LOCAL", {})
			
 
				+            pull_config = storage_config.get("PULL", {})
			
 
				+
			
 
				+            self._storage_manager = get_storage_manager(
			
 
				+                backend_type=storage_config.get("BACKEND", "auto"),
			
 
				+                data_dir=local_config.get("DATA_DIR", "output"),
			
 
				+                enable_txt=storage_config.get("FORMATS", {}).get("TXT", True),
			
 
				+                enable_html=storage_config.get("FORMATS", {}).get("HTML", True),
			
 
				+                remote_config={
			
 
				+                    "bucket_name": remote_config.get("BUCKET_NAME", ""),
			
 
				+                    "access_key_id": remote_config.get("ACCESS_KEY_ID", ""),
			
 
				+                    "secret_access_key": remote_config.get("SECRET_ACCESS_KEY", ""),
			
 
				+                    "endpoint_url": remote_config.get("ENDPOINT_URL", ""),
			
 
				+                    "region": remote_config.get("REGION", ""),
			
 
				+                },
			
 
				+                local_retention_days=local_config.get("RETENTION_DAYS", 0),
			
 
				+                remote_retention_days=remote_config.get("RETENTION_DAYS", 0),
			
 
				+                pull_enabled=pull_config.get("ENABLED", False),
			
 
				+                pull_days=pull_config.get("DAYS", 7),
			
 
				+                timezone=self.timezone,
			
 
				+            )
			
 
				+        return self._storage_manager
			
 
				+
			
 
				+    def get_output_path(self, subfolder: str, filename: str) -> str:
			
 
				+        """获取输出路径"""
			
 
				+        output_dir = Path("output") / self.format_date() / subfolder
			
 
				+        output_dir.mkdir(parents=True, exist_ok=True)
			
 
				+        return str(output_dir / filename)
			
 
				+
			
 
				+    # === 数据处理 ===
			
 
				+
			
 
				+    def save_titles(self, results: Dict, id_to_name: Dict, failed_ids: List) -> str:
			
 
				+        """保存标题到文件"""
			
 
				+        output_path = self.get_output_path("txt", f"{self.format_time()}.txt")
			
 
				+        return save_titles_to_file(results, id_to_name, failed_ids, output_path, clean_title)
			
 
				+
			
 
				+    def read_today_titles(
			
 
				+        self, platform_ids: Optional[List[str]] = None
			
 
				+    ) -> Tuple[Dict, Dict, Dict]:
			
 
				+        """读取当天所有标题"""
			
 
				+        return read_all_today_titles(self.get_storage_manager(), platform_ids)
			
 
				+
			
 
				+    def detect_new_titles(
			
 
				+        self, platform_ids: Optional[List[str]] = None
			
 
				+    ) -> Dict:
			
 
				+        """检测最新批次的新增标题"""
			
 
				+        return detect_latest_new_titles(self.get_storage_manager(), platform_ids)
			
 
				+
			
 
				+    def is_first_crawl(self) -> bool:
			
 
				+        """检测是否是当天第一次爬取"""
			
 
				+        return is_first_crawl_today("output", self.format_date())
			
 
				+
			
 
				+    # === 频率词处理 ===
			
 
				+
			
 
				+    def load_frequency_words(
			
 
				+        self, frequency_file: Optional[str] = None
			
 
				+    ) -> Tuple[List[Dict], List[str], List[str]]:
			
 
				+        """加载频率词配置"""
			
 
				+        return load_frequency_words(frequency_file)
			
 
				+
			
 
				+    def matches_word_groups(
			
 
				+        self,
			
 
				+        title: str,
			
 
				+        word_groups: List[Dict],
			
 
				+        filter_words: List[str],
			
 
				+        global_filters: Optional[List[str]] = None,
			
 
				+    ) -> bool:
			
 
				+        """检查标题是否匹配词组规则"""
			
 
				+        return matches_word_groups(title, word_groups, filter_words, global_filters)
			
 
				+
			
 
				+    # === 统计分析 ===
			
 
				+
			
 
				+    def count_frequency(
			
 
				+        self,
			
 
				+        results: Dict,
			
 
				+        word_groups: List[Dict],
			
 
				+        filter_words: List[str],
			
 
				+        id_to_name: Dict,
			
 
				+        title_info: Optional[Dict] = None,
			
 
				+        new_titles: Optional[Dict] = None,
			
 
				+        mode: str = "daily",
			
 
				+        global_filters: Optional[List[str]] = None,
			
 
				+    ) -> Tuple[List[Dict], int]:
			
 
				+        """统计词频"""
			
 
				+        return count_word_frequency(
			
 
				+            results=results,
			
 
				+            word_groups=word_groups,
			
 
				+            filter_words=filter_words,
			
 
				+            id_to_name=id_to_name,
			
 
				+            title_info=title_info,
			
 
				+            rank_threshold=self.rank_threshold,
			
 
				+            new_titles=new_titles,
			
 
				+            mode=mode,
			
 
				+            global_filters=global_filters,
			
 
				+            weight_config=self.weight_config,
			
 
				+            max_news_per_keyword=self.config.get("MAX_NEWS_PER_KEYWORD", 0),
			
 
				+            sort_by_position_first=self.config.get("SORT_BY_POSITION_FIRST", False),
			
 
				+            is_first_crawl_func=self.is_first_crawl,
			
 
				+            convert_time_func=self.convert_time_display,
			
 
				+        )
			
 
				+
			
 
				+    # === 报告生成 ===
			
 
				+
			
 
				+    def prepare_report(
			
 
				+        self,
			
 
				+        stats: List[Dict],
			
 
				+        failed_ids: Optional[List] = None,
			
 
				+        new_titles: Optional[Dict] = None,
			
 
				+        id_to_name: Optional[Dict] = None,
			
 
				+        mode: str = "daily",
			
 
				+    ) -> Dict:
			
 
				+        """准备报告数据"""
			
 
				+        return prepare_report_data(
			
 
				+            stats=stats,
			
 
				+            failed_ids=failed_ids,
			
 
				+            new_titles=new_titles,
			
 
				+            id_to_name=id_to_name,
			
 
				+            mode=mode,
			
 
				+            rank_threshold=self.rank_threshold,
			
 
				+            matches_word_groups_func=self.matches_word_groups,
			
 
				+            load_frequency_words_func=self.load_frequency_words,
			
 
				+        )
			
 
				+
			
 
				+    def generate_html(
			
 
				+        self,
			
 
				+        stats: List[Dict],
			
 
				+        total_titles: int,
			
 
				+        failed_ids: Optional[List] = None,
			
 
				+        new_titles: Optional[Dict] = None,
			
 
				+        id_to_name: Optional[Dict] = None,
			
 
				+        mode: str = "daily",
			
 
				+        is_daily_summary: bool = False,
			
 
				+        update_info: Optional[Dict] = None,
			
 
				+    ) -> str:
			
 
				+        """生成HTML报告"""
			
 
				+        return generate_html_report(
			
 
				+            stats=stats,
			
 
				+            total_titles=total_titles,
			
 
				+            failed_ids=failed_ids,
			
 
				+            new_titles=new_titles,
			
 
				+            id_to_name=id_to_name,
			
 
				+            mode=mode,
			
 
				+            is_daily_summary=is_daily_summary,
			
 
				+            update_info=update_info,
			
 
				+            rank_threshold=self.rank_threshold,
			
 
				+            output_dir="output",
			
 
				+            date_folder=self.format_date(),
			
 
				+            time_filename=self.format_time(),
			
 
				+            render_html_func=lambda *args, **kwargs: self.render_html(*args, **kwargs),
			
 
				+            matches_word_groups_func=self.matches_word_groups,
			
 
				+            load_frequency_words_func=self.load_frequency_words,
			
 
				+            enable_index_copy=True,
			
 
				+        )
			
 
				+
			
 
				+    def render_html(
			
 
				+        self,
			
 
				+        report_data: Dict,
			
 
				+        total_titles: int,
			
 
				+        is_daily_summary: bool = False,
			
 
				+        mode: str = "daily",
			
 
				+        update_info: Optional[Dict] = None,
			
 
				+    ) -> str:
			
 
				+        """渲染HTML内容"""
			
 
				+        return render_html_content(
			
 
				+            report_data=report_data,
			
 
				+            total_titles=total_titles,
			
 
				+            is_daily_summary=is_daily_summary,
			
 
				+            mode=mode,
			
 
				+            update_info=update_info,
			
 
				+            reverse_content_order=self.config.get("REVERSE_CONTENT_ORDER", False),
			
 
				+            get_time_func=self.get_time,
			
 
				+        )
			
 
				+
			
 
				+    # === 通知内容渲染 ===
			
 
				+
			
 
				+    def render_feishu(
			
 
				+        self,
			
 
				+        report_data: Dict,
			
 
				+        update_info: Optional[Dict] = None,
			
 
				+        mode: str = "daily",
			
 
				+    ) -> str:
			
 
				+        """渲染飞书内容"""
			
 
				+        return render_feishu_content(
			
 
				+            report_data=report_data,
			
 
				+            update_info=update_info,
			
 
				+            mode=mode,
			
 
				+            separator=self.config.get("FEISHU_MESSAGE_SEPARATOR", "---"),
			
 
				+            reverse_content_order=self.config.get("REVERSE_CONTENT_ORDER", False),
			
 
				+            get_time_func=self.get_time,
			
 
				+        )
			
 
				+
			
 
				+    def render_dingtalk(
			
 
				+        self,
			
 
				+        report_data: Dict,
			
 
				+        update_info: Optional[Dict] = None,
			
 
				+        mode: str = "daily",
			
 
				+    ) -> str:
			
 
				+        """渲染钉钉内容"""
			
 
				+        return render_dingtalk_content(
			
 
				+            report_data=report_data,
			
 
				+            update_info=update_info,
			
 
				+            mode=mode,
			
 
				+            reverse_content_order=self.config.get("REVERSE_CONTENT_ORDER", False),
			
 
				+            get_time_func=self.get_time,
			
 
				+        )
			
 
				+
			
 
				+    def split_content(
			
 
				+        self,
			
 
				+        report_data: Dict,
			
 
				+        format_type: str,
			
 
				+        update_info: Optional[Dict] = None,
			
 
				+        max_bytes: Optional[int] = None,
			
 
				+        mode: str = "daily",
			
 
				+    ) -> List[str]:
			
 
				+        """分批处理消息内容"""
			
 
				+        return split_content_into_batches(
			
 
				+            report_data=report_data,
			
 
				+            format_type=format_type,
			
 
				+            update_info=update_info,
			
 
				+            max_bytes=max_bytes,
			
 
				+            mode=mode,
			
 
				+            batch_sizes={
			
 
				+                "dingtalk": self.config.get("DINGTALK_BATCH_SIZE", 20000),
			
 
				+                "feishu": self.config.get("FEISHU_BATCH_SIZE", 29000),
			
 
				+                "default": self.config.get("MESSAGE_BATCH_SIZE", 4000),
			
 
				+            },
			
 
				+            feishu_separator=self.config.get("FEISHU_MESSAGE_SEPARATOR", "---"),
			
 
				+            reverse_content_order=self.config.get("REVERSE_CONTENT_ORDER", False),
			
 
				+            get_time_func=self.get_time,
			
 
				+        )
			
 
				+
			
 
				+    # === 通知发送 ===
			
 
				+
			
 
				+    def create_notification_dispatcher(self) -> NotificationDispatcher:
			
 
				+        """创建通知调度器"""
			
 
				+        return NotificationDispatcher(
			
 
				+            config=self.config,
			
 
				+            get_time_func=self.get_time,
			
 
				+            split_content_func=self.split_content,
			
 
				+        )
			
 
				+
			
 
				+    def create_push_manager(self) -> PushRecordManager:
			
 
				+        """创建推送记录管理器"""
			
 
				+        return PushRecordManager(
			
 
				+            storage_backend=self.get_storage_manager(),
			
 
				+            get_time_func=self.get_time,
			
 
				+        )
			
 
				+
			
 
				+    # === 资源清理 ===
			
 
				+
			
 
				+    def cleanup(self):
			
 
				+        """清理资源"""
			
 
				+        if self._storage_manager:
			
 
				+            self._storage_manager.cleanup_old_data()
			
 
				+            self._storage_manager.cleanup()
			
 
				+            self._storage_manager = None
			
--- a/trendradar/core/__init__.py
+++ b/trendradar/core/__init__.py
@@ -0,0 +1,47 @@
 
				+# coding=utf-8
			
 
				+"""
			
 
				+核心模块 - 配置管理和核心工具
			
 
				+"""
			
 
				+
			
 
				+from trendradar.core.config import (
			
 
				+    parse_multi_account_config,
			
 
				+    validate_paired_configs,
			
 
				+    limit_accounts,
			
 
				+    get_account_at_index,
			
 
				+)
			
 
				+from trendradar.core.loader import load_config
			
 
				+from trendradar.core.frequency import load_frequency_words, matches_word_groups
			
 
				+from trendradar.core.data import (
			
 
				+    save_titles_to_file,
			
 
				+    read_all_today_titles_from_storage,
			
 
				+    read_all_today_titles,
			
 
				+    detect_latest_new_titles_from_storage,
			
 
				+    detect_latest_new_titles,
			
 
				+    is_first_crawl_today,
			
 
				+)
			
 
				+from trendradar.core.analyzer import (
			
 
				+    calculate_news_weight,
			
 
				+    format_time_display,
			
 
				+    count_word_frequency,
			
 
				+)
			
 
				+
			
 
				+__all__ = [
			
 
				+    "parse_multi_account_config",
			
 
				+    "validate_paired_configs",
			
 
				+    "limit_accounts",
			
 
				+    "get_account_at_index",
			
 
				+    "load_config",
			
 
				+    "load_frequency_words",
			
 
				+    "matches_word_groups",
			
 
				+    # 数据处理
			
 
				+    "save_titles_to_file",
			
 
				+    "read_all_today_titles_from_storage",
			
 
				+    "read_all_today_titles",
			
 
				+    "detect_latest_new_titles_from_storage",
			
 
				+    "detect_latest_new_titles",
			
 
				+    "is_first_crawl_today",
			
 
				+    # 统计分析
			
 
				+    "calculate_news_weight",
			
 
				+    "format_time_display",
			
 
				+    "count_word_frequency",
			
 
				+]
			
--- a/trendradar/core/analyzer.py
+++ b/trendradar/core/analyzer.py
@@ -0,0 +1,469 @@
 
				+# coding=utf-8
			
 
				+"""
			
 
				+统计分析模块
			
 
				+
			
 
				+提供新闻统计和分析功能：
			
 
				+- calculate_news_weight: 计算新闻权重
			
 
				+- format_time_display: 格式化时间显示
			
 
				+- count_word_frequency: 统计词频
			
 
				+"""
			
 
				+
			
 
				+from typing import Dict, List, Tuple, Optional, Callable
			
 
				+
			
 
				+from trendradar.core.frequency import matches_word_groups
			
 
				+
			
 
				+
			
 
				+def calculate_news_weight(
			
 
				+    title_data: Dict,
			
 
				+    rank_threshold: int,
			
 
				+    weight_config: Dict,
			
 
				+) -> float:
			
 
				+    """
			
 
				+    计算新闻权重，用于排序
			
 
				+
			
 
				+    Args:
			
 
				+        title_data: 标题数据，包含 ranks 和 count
			
 
				+        rank_threshold: 排名阈值
			
 
				+        weight_config: 权重配置 {RANK_WEIGHT, FREQUENCY_WEIGHT, HOTNESS_WEIGHT}
			
 
				+
			
 
				+    Returns:
			
 
				+        float: 计算出的权重值
			
 
				+    """
			
 
				+    ranks = title_data.get("ranks", [])
			
 
				+    if not ranks:
			
 
				+        return 0.0
			
 
				+
			
 
				+    count = title_data.get("count", len(ranks))
			
 
				+
			
 
				+    # 排名权重：Σ(11 - min(rank, 10)) / 出现次数
			
 
				+    rank_scores = []
			
 
				+    for rank in ranks:
			
 
				+        score = 11 - min(rank, 10)
			
 
				+        rank_scores.append(score)
			
 
				+
			
 
				+    rank_weight = sum(rank_scores) / len(ranks) if ranks else 0
			
 
				+
			
 
				+    # 频次权重：min(出现次数, 10) × 10
			
 
				+    frequency_weight = min(count, 10) * 10
			
 
				+
			
 
				+    # 热度加成：高排名次数 / 总出现次数 × 100
			
 
				+    high_rank_count = sum(1 for rank in ranks if rank <= rank_threshold)
			
 
				+    hotness_ratio = high_rank_count / len(ranks) if ranks else 0
			
 
				+    hotness_weight = hotness_ratio * 100
			
 
				+
			
 
				+    total_weight = (
			
 
				+        rank_weight * weight_config["RANK_WEIGHT"]
			
 
				+        + frequency_weight * weight_config["FREQUENCY_WEIGHT"]
			
 
				+        + hotness_weight * weight_config["HOTNESS_WEIGHT"]
			
 
				+    )
			
 
				+
			
 
				+    return total_weight
			
 
				+
			
 
				+
			
 
				+def format_time_display(
			
 
				+    first_time: str,
			
 
				+    last_time: str,
			
 
				+    convert_time_func: Callable[[str], str],
			
 
				+) -> str:
			
 
				+    """
			
 
				+    格式化时间显示（将 HH-MM 转换为 HH:MM）
			
 
				+
			
 
				+    Args:
			
 
				+        first_time: 首次出现时间
			
 
				+        last_time: 最后出现时间
			
 
				+        convert_time_func: 时间格式转换函数
			
 
				+
			
 
				+    Returns:
			
 
				+        str: 格式化后的时间显示字符串
			
 
				+    """
			
 
				+    if not first_time:
			
 
				+        return ""
			
 
				+    # 转换为显示格式
			
 
				+    first_display = convert_time_func(first_time)
			
 
				+    last_display = convert_time_func(last_time)
			
 
				+    if first_display == last_display or not last_display:
			
 
				+        return first_display
			
 
				+    else:
			
 
				+        return f"[{first_display} ~ {last_display}]"
			
 
				+
			
 
				+
			
 
				+def count_word_frequency(
			
 
				+    results: Dict,
			
 
				+    word_groups: List[Dict],
			
 
				+    filter_words: List[str],
			
 
				+    id_to_name: Dict,
			
 
				+    title_info: Optional[Dict] = None,
			
 
				+    rank_threshold: int = 3,
			
 
				+    new_titles: Optional[Dict] = None,
			
 
				+    mode: str = "daily",
			
 
				+    global_filters: Optional[List[str]] = None,
			
 
				+    weight_config: Optional[Dict] = None,
			
 
				+    max_news_per_keyword: int = 0,
			
 
				+    sort_by_position_first: bool = False,
			
 
				+    is_first_crawl_func: Optional[Callable[[], bool]] = None,
			
 
				+    convert_time_func: Optional[Callable[[str], str]] = None,
			
 
				+) -> Tuple[List[Dict], int]:
			
 
				+    """
			
 
				+    统计词频，支持必须词、频率词、过滤词、全局过滤词，并标记新增标题
			
 
				+
			
 
				+    Args:
			
 
				+        results: 抓取结果 {source_id: {title: title_data}}
			
 
				+        word_groups: 词组配置列表
			
 
				+        filter_words: 过滤词列表
			
 
				+        id_to_name: ID 到名称的映射
			
 
				+        title_info: 标题统计信息（可选）
			
 
				+        rank_threshold: 排名阈值
			
 
				+        new_titles: 新增标题（可选）
			
 
				+        mode: 报告模式 (daily/incremental/current)
			
 
				+        global_filters: 全局过滤词（可选）
			
 
				+        weight_config: 权重配置
			
 
				+        max_news_per_keyword: 每个关键词最大显示数量
			
 
				+        sort_by_position_first: 是否优先按配置位置排序
			
 
				+        is_first_crawl_func: 检测是否是当天第一次爬取的函数
			
 
				+        convert_time_func: 时间格式转换函数
			
 
				+
			
 
				+    Returns:
			
 
				+        Tuple[List[Dict], int]: (统计结果列表, 总标题数)
			
 
				+    """
			
 
				+    # 默认权重配置
			
 
				+    if weight_config is None:
			
 
				+        weight_config = {
			
 
				+            "RANK_WEIGHT": 0.4,
			
 
				+            "FREQUENCY_WEIGHT": 0.3,
			
 
				+            "HOTNESS_WEIGHT": 0.3,
			
 
				+        }
			
 
				+
			
 
				+    # 默认时间转换函数
			
 
				+    if convert_time_func is None:
			
 
				+        convert_time_func = lambda x: x
			
 
				+
			
 
				+    # 默认首次爬取检测函数
			
 
				+    if is_first_crawl_func is None:
			
 
				+        is_first_crawl_func = lambda: True
			
 
				+
			
 
				+    # 如果没有配置词组，创建一个包含所有新闻的虚拟词组
			
 
				+    if not word_groups:
			
 
				+        print("频率词配置为空，将显示所有新闻")
			
 
				+        word_groups = [{"required": [], "normal": [], "group_key": "全部新闻"}]
			
 
				+        filter_words = []  # 清空过滤词，显示所有新闻
			
 
				+
			
 
				+    is_first_today = is_first_crawl_func()
			
 
				+
			
 
				+    # 确定处理的数据源和新增标记逻辑
			
 
				+    if mode == "incremental":
			
 
				+        if is_first_today:
			
 
				+            # 增量模式 + 当天第一次：处理所有新闻，都标记为新增
			
 
				+            results_to_process = results
			
 
				+            all_news_are_new = True
			
 
				+        else:
			
 
				+            # 增量模式 + 当天非第一次：只处理新增的新闻
			
 
				+            results_to_process = new_titles if new_titles else {}
			
 
				+            all_news_are_new = True
			
 
				+    elif mode == "current":
			
 
				+        # current 模式：只处理当前时间批次的新闻，但统计信息来自全部历史
			
 
				+        if title_info:
			
 
				+            latest_time = None
			
 
				+            for source_titles in title_info.values():
			
 
				+                for title_data in source_titles.values():
			
 
				+                    last_time = title_data.get("last_time", "")
			
 
				+                    if last_time:
			
 
				+                        if latest_time is None or last_time > latest_time:
			
 
				+                            latest_time = last_time
			
 
				+
			
 
				+            # 只处理 last_time 等于最新时间的新闻
			
 
				+            if latest_time:
			
 
				+                results_to_process = {}
			
 
				+                for source_id, source_titles in results.items():
			
 
				+                    if source_id in title_info:
			
 
				+                        filtered_titles = {}
			
 
				+                        for title, title_data in source_titles.items():
			
 
				+                            if title in title_info[source_id]:
			
 
				+                                info = title_info[source_id][title]
			
 
				+                                if info.get("last_time") == latest_time:
			
 
				+                                    filtered_titles[title] = title_data
			
 
				+                        if filtered_titles:
			
 
				+                            results_to_process[source_id] = filtered_titles
			
 
				+
			
 
				+                print(
			
 
				+                    f"当前榜单模式：最新时间 {latest_time}，筛选出 {sum(len(titles) for titles in results_to_process.values())} 条当前榜单新闻"
			
 
				+                )
			
 
				+            else:
			
 
				+                results_to_process = results
			
 
				+        else:
			
 
				+            results_to_process = results
			
 
				+        all_news_are_new = False
			
 
				+    else:
			
 
				+        # 当日汇总模式：处理所有新闻
			
 
				+        results_to_process = results
			
 
				+        all_news_are_new = False
			
 
				+        total_input_news = sum(len(titles) for titles in results.values())
			
 
				+        filter_status = (
			
 
				+            "全部显示"
			
 
				+            if len(word_groups) == 1 and word_groups[0]["group_key"] == "全部新闻"
			
 
				+            else "频率词过滤"
			
 
				+        )
			
 
				+        print(f"当日汇总模式：处理 {total_input_news} 条新闻，模式：{filter_status}")
			
 
				+
			
 
				+    word_stats = {}
			
 
				+    total_titles = 0
			
 
				+    processed_titles = {}
			
 
				+    matched_new_count = 0
			
 
				+
			
 
				+    if title_info is None:
			
 
				+        title_info = {}
			
 
				+    if new_titles is None:
			
 
				+        new_titles = {}
			
 
				+
			
 
				+    for group in word_groups:
			
 
				+        group_key = group["group_key"]
			
 
				+        word_stats[group_key] = {"count": 0, "titles": {}}
			
 
				+
			
 
				+    for source_id, titles_data in results_to_process.items():
			
 
				+        total_titles += len(titles_data)
			
 
				+
			
 
				+        if source_id not in processed_titles:
			
 
				+            processed_titles[source_id] = {}
			
 
				+
			
 
				+        for title, title_data in titles_data.items():
			
 
				+            if title in processed_titles.get(source_id, {}):
			
 
				+                continue
			
 
				+
			
 
				+            # 使用统一的匹配逻辑
			
 
				+            matches_frequency_words = matches_word_groups(
			
 
				+                title, word_groups, filter_words, global_filters
			
 
				+            )
			
 
				+
			
 
				+            if not matches_frequency_words:
			
 
				+                continue
			
 
				+
			
 
				+            # 如果是增量模式或 current 模式第一次，统计匹配的新增新闻数量
			
 
				+            if (mode == "incremental" and all_news_are_new) or (
			
 
				+                mode == "current" and is_first_today
			
 
				+            ):
			
 
				+                matched_new_count += 1
			
 
				+
			
 
				+            source_ranks = title_data.get("ranks", [])
			
 
				+            source_url = title_data.get("url", "")
			
 
				+            source_mobile_url = title_data.get("mobileUrl", "")
			
 
				+
			
 
				+            # 找到匹配的词组（防御性转换确保类型安全）
			
 
				+            title_lower = str(title).lower() if not isinstance(title, str) else title.lower()
			
 
				+            for group in word_groups:
			
 
				+                required_words = group["required"]
			
 
				+                normal_words = group["normal"]
			
 
				+
			
 
				+                # 如果是"全部新闻"模式，所有标题都匹配第一个（唯一的）词组
			
 
				+                if len(word_groups) == 1 and word_groups[0]["group_key"] == "全部新闻":
			
 
				+                    group_key = group["group_key"]
			
 
				+                    word_stats[group_key]["count"] += 1
			
 
				+                    if source_id not in word_stats[group_key]["titles"]:
			
 
				+                        word_stats[group_key]["titles"][source_id] = []
			
 
				+                else:
			
 
				+                    # 原有的匹配逻辑
			
 
				+                    if required_words:
			
 
				+                        all_required_present = all(
			
 
				+                            req_word.lower() in title_lower
			
 
				+                            for req_word in required_words
			
 
				+                        )
			
 
				+                        if not all_required_present:
			
 
				+                            continue
			
 
				+
			
 
				+                    if normal_words:
			
 
				+                        any_normal_present = any(
			
 
				+                            normal_word.lower() in title_lower
			
 
				+                            for normal_word in normal_words
			
 
				+                        )
			
 
				+                        if not any_normal_present:
			
 
				+                            continue
			
 
				+
			
 
				+                    group_key = group["group_key"]
			
 
				+                    word_stats[group_key]["count"] += 1
			
 
				+                    if source_id not in word_stats[group_key]["titles"]:
			
 
				+                        word_stats[group_key]["titles"][source_id] = []
			
 
				+
			
 
				+                first_time = ""
			
 
				+                last_time = ""
			
 
				+                count_info = 1
			
 
				+                ranks = source_ranks if source_ranks else []
			
 
				+                url = source_url
			
 
				+                mobile_url = source_mobile_url
			
 
				+
			
 
				+                # 对于 current 模式，从历史统计信息中获取完整数据
			
 
				+                if (
			
 
				+                    mode == "current"
			
 
				+                    and title_info
			
 
				+                    and source_id in title_info
			
 
				+                    and title in title_info[source_id]
			
 
				+                ):
			
 
				+                    info = title_info[source_id][title]
			
 
				+                    first_time = info.get("first_time", "")
			
 
				+                    last_time = info.get("last_time", "")
			
 
				+                    count_info = info.get("count", 1)
			
 
				+                    if "ranks" in info and info["ranks"]:
			
 
				+                        ranks = info["ranks"]
			
 
				+                    url = info.get("url", source_url)
			
 
				+                    mobile_url = info.get("mobileUrl", source_mobile_url)
			
 
				+                elif (
			
 
				+                    title_info
			
 
				+                    and source_id in title_info
			
 
				+                    and title in title_info[source_id]
			
 
				+                ):
			
 
				+                    info = title_info[source_id][title]
			
 
				+                    first_time = info.get("first_time", "")
			
 
				+                    last_time = info.get("last_time", "")
			
 
				+                    count_info = info.get("count", 1)
			
 
				+                    if "ranks" in info and info["ranks"]:
			
 
				+                        ranks = info["ranks"]
			
 
				+                    url = info.get("url", source_url)
			
 
				+                    mobile_url = info.get("mobileUrl", source_mobile_url)
			
 
				+
			
 
				+                if not ranks:
			
 
				+                    ranks = [99]
			
 
				+
			
 
				+                time_display = format_time_display(first_time, last_time, convert_time_func)
			
 
				+
			
 
				+                source_name = id_to_name.get(source_id, source_id)
			
 
				+
			
 
				+                # 判断是否为新增
			
 
				+                is_new = False
			
 
				+                if all_news_are_new:
			
 
				+                    # 增量模式下所有处理的新闻都是新增，或者当天第一次的所有新闻都是新增
			
 
				+                    is_new = True
			
 
				+                elif new_titles and source_id in new_titles:
			
 
				+                    # 检查是否在新增列表中
			
 
				+                    new_titles_for_source = new_titles[source_id]
			
 
				+                    is_new = title in new_titles_for_source
			
 
				+
			
 
				+                word_stats[group_key]["titles"][source_id].append(
			
 
				+                    {
			
 
				+                        "title": title,
			
 
				+                        "source_name": source_name,
			
 
				+                        "first_time": first_time,
			
 
				+                        "last_time": last_time,
			
 
				+                        "time_display": time_display,
			
 
				+                        "count": count_info,
			
 
				+                        "ranks": ranks,
			
 
				+                        "rank_threshold": rank_threshold,
			
 
				+                        "url": url,
			
 
				+                        "mobileUrl": mobile_url,
			
 
				+                        "is_new": is_new,
			
 
				+                    }
			
 
				+                )
			
 
				+
			
 
				+                if source_id not in processed_titles:
			
 
				+                    processed_titles[source_id] = {}
			
 
				+                processed_titles[source_id][title] = True
			
 
				+
			
 
				+                break
			
 
				+
			
 
				+    # 最后统一打印汇总信息
			
 
				+    if mode == "incremental":
			
 
				+        if is_first_today:
			
 
				+            total_input_news = sum(len(titles) for titles in results.values())
			
 
				+            filter_status = (
			
 
				+                "全部显示"
			
 
				+                if len(word_groups) == 1 and word_groups[0]["group_key"] == "全部新闻"
			
 
				+                else "频率词匹配"
			
 
				+            )
			
 
				+            print(
			
 
				+                f"增量模式：当天第一次爬取，{total_input_news} 条新闻中有 {matched_new_count} 条{filter_status}"
			
 
				+            )
			
 
				+        else:
			
 
				+            if new_titles:
			
 
				+                total_new_count = sum(len(titles) for titles in new_titles.values())
			
 
				+                filter_status = (
			
 
				+                    "全部显示"
			
 
				+                    if len(word_groups) == 1
			
 
				+                    and word_groups[0]["group_key"] == "全部新闻"
			
 
				+                    else "匹配频率词"
			
 
				+                )
			
 
				+                print(
			
 
				+                    f"增量模式：{total_new_count} 条新增新闻中，有 {matched_new_count} 条{filter_status}"
			
 
				+                )
			
 
				+                if matched_new_count == 0 and len(word_groups) > 1:
			
 
				+                    print("增量模式：没有新增新闻匹配频率词，将不会发送通知")
			
 
				+            else:
			
 
				+                print("增量模式：未检测到新增新闻")
			
 
				+    elif mode == "current":
			
 
				+        total_input_news = sum(len(titles) for titles in results_to_process.values())
			
 
				+        if is_first_today:
			
 
				+            filter_status = (
			
 
				+                "全部显示"
			
 
				+                if len(word_groups) == 1 and word_groups[0]["group_key"] == "全部新闻"
			
 
				+                else "频率词匹配"
			
 
				+            )
			
 
				+            print(
			
 
				+                f"当前榜单模式：当天第一次爬取，{total_input_news} 条当前榜单新闻中有 {matched_new_count} 条{filter_status}"
			
 
				+            )
			
 
				+        else:
			
 
				+            matched_count = sum(stat["count"] for stat in word_stats.values())
			
 
				+            filter_status = (
			
 
				+                "全部显示"
			
 
				+                if len(word_groups) == 1 and word_groups[0]["group_key"] == "全部新闻"
			
 
				+                else "频率词匹配"
			
 
				+            )
			
 
				+            print(
			
 
				+                f"当前榜单模式：{total_input_news} 条当前榜单新闻中有 {matched_count} 条{filter_status}"
			
 
				+            )
			
 
				+
			
 
				+    stats = []
			
 
				+    # 创建 group_key 到位置和最大数量的映射
			
 
				+    group_key_to_position = {
			
 
				+        group["group_key"]: idx for idx, group in enumerate(word_groups)
			
 
				+    }
			
 
				+    group_key_to_max_count = {
			
 
				+        group["group_key"]: group.get("max_count", 0) for group in word_groups
			
 
				+    }
			
 
				+
			
 
				+    for group_key, data in word_stats.items():
			
 
				+        all_titles = []
			
 
				+        for source_id, title_list in data["titles"].items():
			
 
				+            all_titles.extend(title_list)
			
 
				+
			
 
				+        # 按权重排序
			
 
				+        sorted_titles = sorted(
			
 
				+            all_titles,
			
 
				+            key=lambda x: (
			
 
				+                -calculate_news_weight(x, rank_threshold, weight_config),
			
 
				+                min(x["ranks"]) if x["ranks"] else 999,
			
 
				+                -x["count"],
			
 
				+            ),
			
 
				+        )
			
 
				+
			
 
				+        # 应用最大显示数量限制（优先级：单独配置 > 全局配置）
			
 
				+        group_max_count = group_key_to_max_count.get(group_key, 0)
			
 
				+        if group_max_count == 0:
			
 
				+            # 使用全局配置
			
 
				+            group_max_count = max_news_per_keyword
			
 
				+
			
 
				+        if group_max_count > 0:
			
 
				+            sorted_titles = sorted_titles[:group_max_count]
			
 
				+
			
 
				+        stats.append(
			
 
				+            {
			
 
				+                "word": group_key,
			
 
				+                "count": data["count"],
			
 
				+                "position": group_key_to_position.get(group_key, 999),
			
 
				+                "titles": sorted_titles,
			
 
				+                "percentage": (
			
 
				+                    round(data["count"] / total_titles * 100, 2)
			
 
				+                    if total_titles > 0
			
 
				+                    else 0
			
 
				+                ),
			
 
				+            }
			
 
				+        )
			
 
				+
			
 
				+    # 根据配置选择排序优先级
			
 
				+    if sort_by_position_first:
			
 
				+        # 先按配置位置，再按热点条数
			
 
				+        stats.sort(key=lambda x: (x["position"], -x["count"]))
			
 
				+    else:
			
 
				+        # 先按热点条数，再按配置位置（原逻辑）
			
 
				+        stats.sort(key=lambda x: (-x["count"], x["position"]))
			
 
				+
			
 
				+    # 打印过滤后的匹配新闻数（与推送显示一致）
			
 
				+    matched_news_count = sum(len(stat["titles"]) for stat in stats if stat["count"] > 0)
			
 
				+    if mode == "daily":
			
 
				+        print(f"频率词过滤后：{matched_news_count} 条新闻匹配（将显示在推送中）")
			
 
				+
			
 
				+    return stats, total_titles
			
--- a/trendradar/core/config.py
+++ b/trendradar/core/config.py
@@ -0,0 +1,152 @@
 
				+# coding=utf-8
			
 
				+"""
			
 
				+配置工具模块 - 多账号配置解析和验证
			
 
				+
			
 
				+提供多账号推送配置的解析、验证和限制功能
			
 
				+"""
			
 
				+
			
 
				+from typing import Dict, List, Optional, Tuple
			
 
				+
			
 
				+
			
 
				+def parse_multi_account_config(config_value: str, separator: str = ";") -> List[str]:
			
 
				+    """
			
 
				+    解析多账号配置，返回账号列表
			
 
				+
			
 
				+    Args:
			
 
				+        config_value: 配置值字符串，多个账号用分隔符分隔
			
 
				+        separator: 分隔符，默认为 ;
			
 
				+
			
 
				+    Returns:
			
 
				+        账号列表，空字符串会被保留（用于占位）
			
 
				+
			
 
				+    Examples:
			
 
				+        >>> parse_multi_account_config("url1;url2;url3")
			
 
				+        ['url1', 'url2', 'url3']
			
 
				+        >>> parse_multi_account_config(";token2")  # 第一个账号无token
			
 
				+        ['', 'token2']
			
 
				+        >>> parse_multi_account_config("")
			
 
				+        []
			
 
				+    """
			
 
				+    if not config_value:
			
 
				+        return []
			
 
				+    # 保留空字符串用于占位（如 ";token2" 表示第一个账号无token）
			
 
				+    accounts = [acc.strip() for acc in config_value.split(separator)]
			
 
				+    # 过滤掉全部为空的情况
			
 
				+    if all(not acc for acc in accounts):
			
 
				+        return []
			
 
				+    return accounts
			
 
				+
			
 
				+
			
 
				+def validate_paired_configs(
			
 
				+    configs: Dict[str, List[str]],
			
 
				+    channel_name: str,
			
 
				+    required_keys: Optional[List[str]] = None
			
 
				+) -> Tuple[bool, int]:
			
 
				+    """
			
 
				+    验证配对配置的数量是否一致
			
 
				+
			
 
				+    对于需要多个配置项配对的渠道（如 Telegram 的 token 和 chat_id），
			
 
				+    验证所有配置项的账号数量是否一致。
			
 
				+
			
 
				+    Args:
			
 
				+        configs: 配置字典，key 为配置名，value 为账号列表
			
 
				+        channel_name: 渠道名称，用于日志输出
			
 
				+        required_keys: 必须有值的配置项列表
			
 
				+
			
 
				+    Returns:
			
 
				+        (是否验证通过, 账号数量)
			
 
				+
			
 
				+    Examples:
			
 
				+        >>> validate_paired_configs({
			
 
				+        ...     "token": ["t1", "t2"],
			
 
				+        ...     "chat_id": ["c1", "c2"]
			
 
				+        ... }, "Telegram", ["token", "chat_id"])
			
 
				+        (True, 2)
			
 
				+
			
 
				+        >>> validate_paired_configs({
			
 
				+        ...     "token": ["t1", "t2"],
			
 
				+        ...     "chat_id": ["c1"]  # 数量不匹配
			
 
				+        ... }, "Telegram", ["token", "chat_id"])
			
 
				+        (False, 0)
			
 
				+    """
			
 
				+    # 过滤掉空列表
			
 
				+    non_empty_configs = {k: v for k, v in configs.items() if v}
			
 
				+
			
 
				+    if not non_empty_configs:
			
 
				+        return True, 0
			
 
				+
			
 
				+    # 检查必须项
			
 
				+    if required_keys:
			
 
				+        for key in required_keys:
			
 
				+            if key not in non_empty_configs or not non_empty_configs[key]:
			
 
				+                return True, 0  # 必须项为空，视为未配置
			
 
				+
			
 
				+    # 获取所有非空配置的长度
			
 
				+    lengths = {k: len(v) for k, v in non_empty_configs.items()}
			
 
				+    unique_lengths = set(lengths.values())
			
 
				+
			
 
				+    if len(unique_lengths) > 1:
			
 
				+        print(f"❌ {channel_name} 配置错误：配对配置数量不一致，将跳过该渠道推送")
			
 
				+        for key, length in lengths.items():
			
 
				+            print(f"   - {key}: {length} 个")
			
 
				+        return False, 0
			
 
				+
			
 
				+    return True, list(unique_lengths)[0] if unique_lengths else 0
			
 
				+
			
 
				+
			
 
				+def limit_accounts(
			
 
				+    accounts: List[str],
			
 
				+    max_count: int,
			
 
				+    channel_name: str
			
 
				+) -> List[str]:
			
 
				+    """
			
 
				+    限制账号数量
			
 
				+
			
 
				+    当配置的账号数量超过最大限制时，只使用前 N 个账号，
			
 
				+    并输出警告信息。
			
 
				+
			
 
				+    Args:
			
 
				+        accounts: 账号列表
			
 
				+        max_count: 最大账号数量
			
 
				+        channel_name: 渠道名称，用于日志输出
			
 
				+
			
 
				+    Returns:
			
 
				+        限制后的账号列表
			
 
				+
			
 
				+    Examples:
			
 
				+        >>> limit_accounts(["a1", "a2", "a3"], 2, "飞书")
			
 
				+        ⚠️ 飞书 配置了 3 个账号，超过最大限制 2，只使用前 2 个
			
 
				+        ['a1', 'a2']
			
 
				+    """
			
 
				+    if len(accounts) > max_count:
			
 
				+        print(f"⚠️ {channel_name} 配置了 {len(accounts)} 个账号，超过最大限制 {max_count}，只使用前 {max_count} 个")
			
 
				+        print(f"   ⚠️ 警告：如果您是 fork 用户，过多账号可能导致 GitHub Actions 运行时间过长，存在账号风险")
			
 
				+        return accounts[:max_count]
			
 
				+    return accounts
			
 
				+
			
 
				+
			
 
				+def get_account_at_index(accounts: List[str], index: int, default: str = "") -> str:
			
 
				+    """
			
 
				+    安全获取指定索引的账号值
			
 
				+
			
 
				+    当索引超出范围或账号值为空时，返回默认值。
			
 
				+
			
 
				+    Args:
			
 
				+        accounts: 账号列表
			
 
				+        index: 索引
			
 
				+        default: 默认值
			
 
				+
			
 
				+    Returns:
			
 
				+        账号值或默认值
			
 
				+
			
 
				+    Examples:
			
 
				+        >>> get_account_at_index(["a", "b", "c"], 1)
			
 
				+        'b'
			
 
				+        >>> get_account_at_index(["a", "", "c"], 1, "default")
			
 
				+        'default'
			
 
				+        >>> get_account_at_index(["a"], 5, "default")
			
 
				+        'default'
			
 
				+    """
			
 
				+    if index < len(accounts):
			
 
				+        return accounts[index] if accounts[index] else default
			
 
				+    return default
			
--- a/trendradar/core/data.py
+++ b/trendradar/core/data.py
@@ -0,0 +1,291 @@
 
				+# coding=utf-8
			
 
				+"""
			
 
				+数据处理模块
			
 
				+
			
 
				+提供数据读取、保存和检测功能：
			
 
				+- save_titles_to_file: 保存标题到 TXT 文件
			
 
				+- read_all_today_titles: 从存储后端读取当天所有标题
			
 
				+- detect_latest_new_titles: 检测最新批次的新增标题
			
 
				+
			
 
				+Author: TrendRadar Team
			
 
				+"""
			
 
				+
			
 
				+from pathlib import Path
			
 
				+from typing import Dict, List, Tuple, Optional, Callable
			
 
				+
			
 
				+
			
 
				+def save_titles_to_file(
			
 
				+    results: Dict,
			
 
				+    id_to_name: Dict,
			
 
				+    failed_ids: List,
			
 
				+    output_path: str,
			
 
				+    clean_title_func: Callable[[str], str],
			
 
				+) -> str:
			
 
				+    """
			
 
				+    保存标题到 TXT 文件
			
 
				+
			
 
				+    Args:
			
 
				+        results: 抓取结果 {source_id: {title: title_data}}
			
 
				+        id_to_name: ID 到名称的映射
			
 
				+        failed_ids: 失败的 ID 列表
			
 
				+        output_path: 输出文件路径
			
 
				+        clean_title_func: 标题清理函数
			
 
				+
			
 
				+    Returns:
			
 
				+        str: 保存的文件路径
			
 
				+    """
			
 
				+    # 确保目录存在
			
 
				+    Path(output_path).parent.mkdir(parents=True, exist_ok=True)
			
 
				+
			
 
				+    with open(output_path, "w", encoding="utf-8") as f:
			
 
				+        for id_value, title_data in results.items():
			
 
				+            # id | name 或 id
			
 
				+            name = id_to_name.get(id_value)
			
 
				+            if name and name != id_value:
			
 
				+                f.write(f"{id_value} | {name}\n")
			
 
				+            else:
			
 
				+                f.write(f"{id_value}\n")
			
 
				+
			
 
				+            # 按排名排序标题
			
 
				+            sorted_titles = []
			
 
				+            for title, info in title_data.items():
			
 
				+                cleaned_title = clean_title_func(title)
			
 
				+                if isinstance(info, dict):
			
 
				+                    ranks = info.get("ranks", [])
			
 
				+                    url = info.get("url", "")
			
 
				+                    mobile_url = info.get("mobileUrl", "")
			
 
				+                else:
			
 
				+                    ranks = info if isinstance(info, list) else []
			
 
				+                    url = ""
			
 
				+                    mobile_url = ""
			
 
				+
			
 
				+                rank = ranks[0] if ranks else 1
			
 
				+                sorted_titles.append((rank, cleaned_title, url, mobile_url))
			
 
				+
			
 
				+            sorted_titles.sort(key=lambda x: x[0])
			
 
				+
			
 
				+            for rank, cleaned_title, url, mobile_url in sorted_titles:
			
 
				+                line = f"{rank}. {cleaned_title}"
			
 
				+
			
 
				+                if url:
			
 
				+                    line += f" [URL:{url}]"
			
 
				+                if mobile_url:
			
 
				+                    line += f" [MOBILE:{mobile_url}]"
			
 
				+                f.write(line + "\n")
			
 
				+
			
 
				+            f.write("\n")
			
 
				+
			
 
				+        if failed_ids:
			
 
				+            f.write("==== 以下ID请求失败 ====\n")
			
 
				+            for id_value in failed_ids:
			
 
				+                f.write(f"{id_value}\n")
			
 
				+
			
 
				+    return output_path
			
 
				+
			
 
				+
			
 
				+def read_all_today_titles_from_storage(
			
 
				+    storage_manager,
			
 
				+    current_platform_ids: Optional[List[str]] = None,
			
 
				+) -> Tuple[Dict, Dict, Dict]:
			
 
				+    """
			
 
				+    从存储后端读取当天所有标题（SQLite 数据）
			
 
				+
			
 
				+    Args:
			
 
				+        storage_manager: 存储管理器实例
			
 
				+        current_platform_ids: 当前监控的平台 ID 列表（用于过滤）
			
 
				+
			
 
				+    Returns:
			
 
				+        Tuple[Dict, Dict, Dict]: (all_results, id_to_name, title_info)
			
 
				+    """
			
 
				+    try:
			
 
				+        news_data = storage_manager.get_today_all_data()
			
 
				+
			
 
				+        if not news_data or not news_data.items:
			
 
				+            return {}, {}, {}
			
 
				+
			
 
				+        all_results = {}
			
 
				+        final_id_to_name = {}
			
 
				+        title_info = {}
			
 
				+
			
 
				+        for source_id, news_list in news_data.items.items():
			
 
				+            # 按平台过滤
			
 
				+            if current_platform_ids is not None and source_id not in current_platform_ids:
			
 
				+                continue
			
 
				+
			
 
				+            # 获取来源名称
			
 
				+            source_name = news_data.id_to_name.get(source_id, source_id)
			
 
				+            final_id_to_name[source_id] = source_name
			
 
				+
			
 
				+            if source_id not in all_results:
			
 
				+                all_results[source_id] = {}
			
 
				+                title_info[source_id] = {}
			
 
				+
			
 
				+            for item in news_list:
			
 
				+                title = item.title
			
 
				+                ranks = getattr(item, 'ranks', [item.rank])
			
 
				+                first_time = getattr(item, 'first_time', item.crawl_time)
			
 
				+                last_time = getattr(item, 'last_time', item.crawl_time)
			
 
				+                count = getattr(item, 'count', 1)
			
 
				+
			
 
				+                all_results[source_id][title] = {
			
 
				+                    "ranks": ranks,
			
 
				+                    "url": item.url or "",
			
 
				+                    "mobileUrl": item.mobile_url or "",
			
 
				+                }
			
 
				+
			
 
				+                title_info[source_id][title] = {
			
 
				+                    "first_time": first_time,
			
 
				+                    "last_time": last_time,
			
 
				+                    "count": count,
			
 
				+                    "ranks": ranks,
			
 
				+                    "url": item.url or "",
			
 
				+                    "mobileUrl": item.mobile_url or "",
			
 
				+                }
			
 
				+
			
 
				+        return all_results, final_id_to_name, title_info
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        print(f"[存储] 从存储后端读取数据失败: {e}")
			
 
				+        return {}, {}, {}
			
 
				+
			
 
				+
			
 
				+def read_all_today_titles(
			
 
				+    storage_manager,
			
 
				+    current_platform_ids: Optional[List[str]] = None,
			
 
				+) -> Tuple[Dict, Dict, Dict]:
			
 
				+    """
			
 
				+    读取当天所有标题（从存储后端）
			
 
				+
			
 
				+    Args:
			
 
				+        storage_manager: 存储管理器实例
			
 
				+        current_platform_ids: 当前监控的平台 ID 列表（用于过滤）
			
 
				+
			
 
				+    Returns:
			
 
				+        Tuple[Dict, Dict, Dict]: (all_results, id_to_name, title_info)
			
 
				+    """
			
 
				+    all_results, final_id_to_name, title_info = read_all_today_titles_from_storage(
			
 
				+        storage_manager, current_platform_ids
			
 
				+    )
			
 
				+
			
 
				+    if all_results:
			
 
				+        total_count = sum(len(titles) for titles in all_results.values())
			
 
				+        print(f"[存储] 已从存储后端读取 {total_count} 条标题")
			
 
				+    else:
			
 
				+        print("[存储] 当天暂无数据")
			
 
				+
			
 
				+    return all_results, final_id_to_name, title_info
			
 
				+
			
 
				+
			
 
				+def detect_latest_new_titles_from_storage(
			
 
				+    storage_manager,
			
 
				+    current_platform_ids: Optional[List[str]] = None,
			
 
				+) -> Dict:
			
 
				+    """
			
 
				+    从存储后端检测最新批次的新增标题
			
 
				+
			
 
				+    Args:
			
 
				+        storage_manager: 存储管理器实例
			
 
				+        current_platform_ids: 当前监控的平台 ID 列表（用于过滤）
			
 
				+
			
 
				+    Returns:
			
 
				+        Dict: 新增标题 {source_id: {title: title_data}}
			
 
				+    """
			
 
				+    try:
			
 
				+        # 获取最新抓取数据
			
 
				+        latest_data = storage_manager.get_latest_crawl_data()
			
 
				+        if not latest_data or not latest_data.items:
			
 
				+            return {}
			
 
				+
			
 
				+        # 获取所有历史数据
			
 
				+        all_data = storage_manager.get_today_all_data()
			
 
				+        if not all_data or not all_data.items:
			
 
				+            # 没有历史数据（第一次抓取），不应该有"新增"标题
			
 
				+            return {}
			
 
				+
			
 
				+        # 收集历史标题（不包括最新批次的时间）
			
 
				+        latest_time = latest_data.crawl_time
			
 
				+        historical_titles = {}
			
 
				+
			
 
				+        for source_id, news_list in all_data.items.items():
			
 
				+            if current_platform_ids is not None and source_id not in current_platform_ids:
			
 
				+                continue
			
 
				+
			
 
				+            historical_titles[source_id] = set()
			
 
				+            for item in news_list:
			
 
				+                # 只统计非最新批次的标题
			
 
				+                first_time = getattr(item, 'first_time', item.crawl_time)
			
 
				+                if first_time != latest_time:
			
 
				+                    historical_titles[source_id].add(item.title)
			
 
				+
			
 
				+        # 检查是否是当天第一次抓取（没有任何历史标题）
			
 
				+        # 如果所有平台的历史标题集合都为空，说明只有一个抓取批次，不应该有"新增"标题
			
 
				+        has_historical_data = any(len(titles) > 0 for titles in historical_titles.values())
			
 
				+        if not has_historical_data:
			
 
				+            return {}
			
 
				+
			
 
				+        # 找出新增标题
			
 
				+        new_titles = {}
			
 
				+        for source_id, news_list in latest_data.items.items():
			
 
				+            if current_platform_ids is not None and source_id not in current_platform_ids:
			
 
				+                continue
			
 
				+
			
 
				+            historical_set = historical_titles.get(source_id, set())
			
 
				+            source_new_titles = {}
			
 
				+
			
 
				+            for item in news_list:
			
 
				+                if item.title not in historical_set:
			
 
				+                    source_new_titles[item.title] = {
			
 
				+                        "ranks": [item.rank],
			
 
				+                        "url": item.url or "",
			
 
				+                        "mobileUrl": item.mobile_url or "",
			
 
				+                    }
			
 
				+
			
 
				+            if source_new_titles:
			
 
				+                new_titles[source_id] = source_new_titles
			
 
				+
			
 
				+        return new_titles
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        print(f"[存储] 从存储后端检测新标题失败: {e}")
			
 
				+        return {}
			
 
				+
			
 
				+
			
 
				+def detect_latest_new_titles(
			
 
				+    storage_manager,
			
 
				+    current_platform_ids: Optional[List[str]] = None,
			
 
				+) -> Dict:
			
 
				+    """
			
 
				+    检测当日最新批次的新增标题（从存储后端）
			
 
				+
			
 
				+    Args:
			
 
				+        storage_manager: 存储管理器实例
			
 
				+        current_platform_ids: 当前监控的平台 ID 列表（用于过滤）
			
 
				+
			
 
				+    Returns:
			
 
				+        Dict: 新增标题 {source_id: {title: title_data}}
			
 
				+    """
			
 
				+    new_titles = detect_latest_new_titles_from_storage(storage_manager, current_platform_ids)
			
 
				+    if new_titles:
			
 
				+        total_new = sum(len(titles) for titles in new_titles.values())
			
 
				+        print(f"[存储] 从存储后端检测到 {total_new} 条新增标题")
			
 
				+    return new_titles
			
 
				+
			
 
				+
			
 
				+def is_first_crawl_today(output_dir: str, date_folder: str) -> bool:
			
 
				+    """
			
 
				+    检测是否是当天第一次爬取
			
 
				+
			
 
				+    Args:
			
 
				+        output_dir: 输出目录
			
 
				+        date_folder: 日期文件夹名称
			
 
				+
			
 
				+    Returns:
			
 
				+        bool: 是否是当天第一次爬取
			
 
				+    """
			
 
				+    txt_dir = Path(output_dir) / date_folder / "txt"
			
 
				+
			
 
				+    if not txt_dir.exists():
			
 
				+        return True
			
 
				+
			
 
				+    files = sorted([f for f in txt_dir.iterdir() if f.suffix == ".txt"])
			
 
				+    return len(files) <= 1
			
--- a/trendradar/core/frequency.py
+++ b/trendradar/core/frequency.py
@@ -0,0 +1,194 @@
 
				+# coding=utf-8
			
 
				+"""
			
 
				+频率词配置加载模块
			
 
				+
			
 
				+负责从配置文件加载频率词规则，支持：
			
 
				+- 普通词组
			
 
				+- 必须词（+前缀）
			
 
				+- 过滤词（!前缀）
			
 
				+- 全局过滤词（[GLOBAL_FILTER] 区域）
			
 
				+- 最大显示数量（@前缀）
			
 
				+"""
			
 
				+
			
 
				+import os
			
 
				+from pathlib import Path
			
 
				+from typing import Dict, List, Tuple, Optional
			
 
				+
			
 
				+
			
 
				+def load_frequency_words(
			
 
				+    frequency_file: Optional[str] = None,
			
 
				+) -> Tuple[List[Dict], List[str], List[str]]:
			
 
				+    """
			
 
				+    加载频率词配置
			
 
				+
			
 
				+    配置文件格式说明：
			
 
				+    - 每个词组由空行分隔
			
 
				+    - [GLOBAL_FILTER] 区域定义全局过滤词
			
 
				+    - [WORD_GROUPS] 区域定义词组（默认）
			
 
				+
			
 
				+    词组语法：
			
 
				+    - 普通词：直接写入，任意匹配即可
			
 
				+    - +词：必须词，所有必须词都要匹配
			
 
				+    - !词：过滤词，匹配则排除
			
 
				+    - @数字：该词组最多显示的条数
			
 
				+
			
 
				+    Args:
			
 
				+        frequency_file: 频率词配置文件路径，默认从环境变量 FREQUENCY_WORDS_PATH 获取或使用 config/frequency_words.txt
			
 
				+
			
 
				+    Returns:
			
 
				+        (词组列表, 词组内过滤词, 全局过滤词)
			
 
				+
			
 
				+    Raises:
			
 
				+        FileNotFoundError: 频率词文件不存在
			
 
				+    """
			
 
				+    if frequency_file is None:
			
 
				+        frequency_file = os.environ.get(
			
 
				+            "FREQUENCY_WORDS_PATH", "config/frequency_words.txt"
			
 
				+        )
			
 
				+
			
 
				+    frequency_path = Path(frequency_file)
			
 
				+    if not frequency_path.exists():
			
 
				+        raise FileNotFoundError(f"频率词文件 {frequency_file} 不存在")
			
 
				+
			
 
				+    with open(frequency_path, "r", encoding="utf-8") as f:
			
 
				+        content = f.read()
			
 
				+
			
 
				+    word_groups = [group.strip() for group in content.split("\n\n") if group.strip()]
			
 
				+
			
 
				+    processed_groups = []
			
 
				+    filter_words = []
			
 
				+    global_filters = []
			
 
				+
			
 
				+    # 默认区域（向后兼容）
			
 
				+    current_section = "WORD_GROUPS"
			
 
				+
			
 
				+    for group in word_groups:
			
 
				+        lines = [line.strip() for line in group.split("\n") if line.strip()]
			
 
				+
			
 
				+        if not lines:
			
 
				+            continue
			
 
				+
			
 
				+        # 检查是否为区域标记
			
 
				+        if lines[0].startswith("[") and lines[0].endswith("]"):
			
 
				+            section_name = lines[0][1:-1].upper()
			
 
				+            if section_name in ("GLOBAL_FILTER", "WORD_GROUPS"):
			
 
				+                current_section = section_name
			
 
				+                lines = lines[1:]  # 移除标记行
			
 
				+
			
 
				+        # 处理全局过滤区域
			
 
				+        if current_section == "GLOBAL_FILTER":
			
 
				+            # 直接添加所有非空行到全局过滤列表
			
 
				+            for line in lines:
			
 
				+                # 忽略特殊语法前缀，只提取纯文本
			
 
				+                if line.startswith(("!", "+", "@")):
			
 
				+                    continue  # 全局过滤区不支持特殊语法
			
 
				+                if line:
			
 
				+                    global_filters.append(line)
			
 
				+            continue
			
 
				+
			
 
				+        # 处理词组区域
			
 
				+        words = lines
			
 
				+
			
 
				+        group_required_words = []
			
 
				+        group_normal_words = []
			
 
				+        group_filter_words = []
			
 
				+        group_max_count = 0  # 默认不限制
			
 
				+
			
 
				+        for word in words:
			
 
				+            if word.startswith("@"):
			
 
				+                # 解析最大显示数量（只接受正整数）
			
 
				+                try:
			
 
				+                    count = int(word[1:])
			
 
				+                    if count > 0:
			
 
				+                        group_max_count = count
			
 
				+                except (ValueError, IndexError):
			
 
				+                    pass  # 忽略无效的@数字格式
			
 
				+            elif word.startswith("!"):
			
 
				+                filter_words.append(word[1:])
			
 
				+                group_filter_words.append(word[1:])
			
 
				+            elif word.startswith("+"):
			
 
				+                group_required_words.append(word[1:])
			
 
				+            else:
			
 
				+                group_normal_words.append(word)
			
 
				+
			
 
				+        if group_required_words or group_normal_words:
			
 
				+            if group_normal_words:
			
 
				+                group_key = " ".join(group_normal_words)
			
 
				+            else:
			
 
				+                group_key = " ".join(group_required_words)
			
 
				+
			
 
				+            processed_groups.append(
			
 
				+                {
			
 
				+                    "required": group_required_words,
			
 
				+                    "normal": group_normal_words,
			
 
				+                    "group_key": group_key,
			
 
				+                    "max_count": group_max_count,
			
 
				+                }
			
 
				+            )
			
 
				+
			
 
				+    return processed_groups, filter_words, global_filters
			
 
				+
			
 
				+
			
 
				+def matches_word_groups(
			
 
				+    title: str,
			
 
				+    word_groups: List[Dict],
			
 
				+    filter_words: List[str],
			
 
				+    global_filters: Optional[List[str]] = None
			
 
				+) -> bool:
			
 
				+    """
			
 
				+    检查标题是否匹配词组规则
			
 
				+
			
 
				+    Args:
			
 
				+        title: 标题文本
			
 
				+        word_groups: 词组列表
			
 
				+        filter_words: 过滤词列表
			
 
				+        global_filters: 全局过滤词列表
			
 
				+
			
 
				+    Returns:
			
 
				+        是否匹配
			
 
				+    """
			
 
				+    # 防御性类型检查：确保 title 是有效字符串
			
 
				+    if not isinstance(title, str):
			
 
				+        title = str(title) if title is not None else ""
			
 
				+    if not title.strip():
			
 
				+        return False
			
 
				+
			
 
				+    title_lower = title.lower()
			
 
				+
			
 
				+    # 全局过滤检查（优先级最高）
			
 
				+    if global_filters:
			
 
				+        if any(global_word.lower() in title_lower for global_word in global_filters):
			
 
				+            return False
			
 
				+
			
 
				+    # 如果没有配置词组，则匹配所有标题（支持显示全部新闻）
			
 
				+    if not word_groups:
			
 
				+        return True
			
 
				+
			
 
				+    # 过滤词检查
			
 
				+    if any(filter_word.lower() in title_lower for filter_word in filter_words):
			
 
				+        return False
			
 
				+
			
 
				+    # 词组匹配检查
			
 
				+    for group in word_groups:
			
 
				+        required_words = group["required"]
			
 
				+        normal_words = group["normal"]
			
 
				+
			
 
				+        # 必须词检查
			
 
				+        if required_words:
			
 
				+            all_required_present = all(
			
 
				+                req_word.lower() in title_lower for req_word in required_words
			
 
				+            )
			
 
				+            if not all_required_present:
			
 
				+                continue
			
 
				+
			
 
				+        # 普通词检查
			
 
				+        if normal_words:
			
 
				+            any_normal_present = any(
			
 
				+                normal_word.lower() in title_lower for normal_word in normal_words
			
 
				+            )
			
 
				+            if not any_normal_present:
			
 
				+                continue
			
 
				+
			
 
				+        return True
			
 
				+
			
 
				+    return False
			
--- a/trendradar/core/loader.py
+++ b/trendradar/core/loader.py
@@ -0,0 +1,332 @@
 
				+# coding=utf-8
			
 
				+"""
			
 
				+配置加载模块
			
 
				+
			
 
				+负责从 YAML 配置文件和环境变量加载配置。
			
 
				+"""
			
 
				+
			
 
				+import os
			
 
				+from pathlib import Path
			
 
				+from typing import Dict, Any, Optional
			
 
				+
			
 
				+import yaml
			
 
				+
			
 
				+from .config import parse_multi_account_config, validate_paired_configs
			
 
				+
			
 
				+
			
 
				+def _get_env_bool(key: str, default: bool = False) -> Optional[bool]:
			
 
				+    """从环境变量获取布尔值，如果未设置返回 None"""
			
 
				+    value = os.environ.get(key, "").strip().lower()
			
 
				+    if not value:
			
 
				+        return None
			
 
				+    return value in ("true", "1")
			
 
				+
			
 
				+
			
 
				+def _get_env_int(key: str, default: int = 0) -> int:
			
 
				+    """从环境变量获取整数值"""
			
 
				+    value = os.environ.get(key, "").strip()
			
 
				+    if not value:
			
 
				+        return default
			
 
				+    try:
			
 
				+        return int(value)
			
 
				+    except ValueError:
			
 
				+        return default
			
 
				+
			
 
				+
			
 
				+def _get_env_str(key: str, default: str = "") -> str:
			
 
				+    """从环境变量获取字符串值"""
			
 
				+    return os.environ.get(key, "").strip() or default
			
 
				+
			
 
				+
			
 
				+def _load_app_config(config_data: Dict) -> Dict:
			
 
				+    """加载应用配置"""
			
 
				+    app_config = config_data.get("app", {})
			
 
				+    return {
			
 
				+        "VERSION_CHECK_URL": app_config.get("version_check_url", ""),
			
 
				+        "SHOW_VERSION_UPDATE": app_config.get("show_version_update", True),
			
 
				+        "TIMEZONE": _get_env_str("TIMEZONE") or app_config.get("timezone", "Asia/Shanghai"),
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+def _load_crawler_config(config_data: Dict) -> Dict:
			
 
				+    """加载爬虫配置"""
			
 
				+    crawler_config = config_data.get("crawler", {})
			
 
				+    enable_crawler_env = _get_env_bool("ENABLE_CRAWLER")
			
 
				+    return {
			
 
				+        "REQUEST_INTERVAL": crawler_config.get("request_interval", 100),
			
 
				+        "USE_PROXY": crawler_config.get("use_proxy", False),
			
 
				+        "DEFAULT_PROXY": crawler_config.get("default_proxy", ""),
			
 
				+        "ENABLE_CRAWLER": enable_crawler_env if enable_crawler_env is not None else crawler_config.get("enable_crawler", True),
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+def _load_report_config(config_data: Dict) -> Dict:
			
 
				+    """加载报告配置"""
			
 
				+    report_config = config_data.get("report", {})
			
 
				+
			
 
				+    # 环境变量覆盖
			
 
				+    sort_by_position_env = _get_env_bool("SORT_BY_POSITION_FIRST")
			
 
				+    reverse_content_env = _get_env_bool("REVERSE_CONTENT_ORDER")
			
 
				+    max_news_env = _get_env_int("MAX_NEWS_PER_KEYWORD")
			
 
				+
			
 
				+    return {
			
 
				+        "REPORT_MODE": _get_env_str("REPORT_MODE") or report_config.get("mode", "daily"),
			
 
				+        "RANK_THRESHOLD": report_config.get("rank_threshold", 10),
			
 
				+        "SORT_BY_POSITION_FIRST": sort_by_position_env if sort_by_position_env is not None else report_config.get("sort_by_position_first", False),
			
 
				+        "MAX_NEWS_PER_KEYWORD": max_news_env or report_config.get("max_news_per_keyword", 0),
			
 
				+        "REVERSE_CONTENT_ORDER": reverse_content_env if reverse_content_env is not None else report_config.get("reverse_content_order", False),
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+def _load_notification_config(config_data: Dict) -> Dict:
			
 
				+    """加载通知配置"""
			
 
				+    notification = config_data.get("notification", {})
			
 
				+    enable_notification_env = _get_env_bool("ENABLE_NOTIFICATION")
			
 
				+
			
 
				+    return {
			
 
				+        "ENABLE_NOTIFICATION": enable_notification_env if enable_notification_env is not None else notification.get("enable_notification", True),
			
 
				+        "MESSAGE_BATCH_SIZE": notification.get("message_batch_size", 4000),
			
 
				+        "DINGTALK_BATCH_SIZE": notification.get("dingtalk_batch_size", 20000),
			
 
				+        "FEISHU_BATCH_SIZE": notification.get("feishu_batch_size", 29000),
			
 
				+        "BARK_BATCH_SIZE": notification.get("bark_batch_size", 3600),
			
 
				+        "SLACK_BATCH_SIZE": notification.get("slack_batch_size", 4000),
			
 
				+        "BATCH_SEND_INTERVAL": notification.get("batch_send_interval", 1.0),
			
 
				+        "FEISHU_MESSAGE_SEPARATOR": notification.get("feishu_message_separator", "---"),
			
 
				+        "MAX_ACCOUNTS_PER_CHANNEL": _get_env_int("MAX_ACCOUNTS_PER_CHANNEL") or notification.get("max_accounts_per_channel", 3),
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+def _load_push_window_config(config_data: Dict) -> Dict:
			
 
				+    """加载推送窗口配置"""
			
 
				+    notification = config_data.get("notification", {})
			
 
				+    push_window = notification.get("push_window", {})
			
 
				+    time_range = push_window.get("time_range", {})
			
 
				+
			
 
				+    enabled_env = _get_env_bool("PUSH_WINDOW_ENABLED")
			
 
				+    once_per_day_env = _get_env_bool("PUSH_WINDOW_ONCE_PER_DAY")
			
 
				+
			
 
				+    return {
			
 
				+        "ENABLED": enabled_env if enabled_env is not None else push_window.get("enabled", False),
			
 
				+        "TIME_RANGE": {
			
 
				+            "START": _get_env_str("PUSH_WINDOW_START") or time_range.get("start", "08:00"),
			
 
				+            "END": _get_env_str("PUSH_WINDOW_END") or time_range.get("end", "22:00"),
			
 
				+        },
			
 
				+        "ONCE_PER_DAY": once_per_day_env if once_per_day_env is not None else push_window.get("once_per_day", True),
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+def _load_weight_config(config_data: Dict) -> Dict:
			
 
				+    """加载权重配置"""
			
 
				+    weight = config_data.get("weight", {})
			
 
				+    return {
			
 
				+        "RANK_WEIGHT": weight.get("rank_weight", 1.0),
			
 
				+        "FREQUENCY_WEIGHT": weight.get("frequency_weight", 1.0),
			
 
				+        "HOTNESS_WEIGHT": weight.get("hotness_weight", 1.0),
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+def _load_storage_config(config_data: Dict) -> Dict:
			
 
				+    """加载存储配置"""
			
 
				+    storage = config_data.get("storage", {})
			
 
				+    formats = storage.get("formats", {})
			
 
				+    local = storage.get("local", {})
			
 
				+    remote = storage.get("remote", {})
			
 
				+    pull = storage.get("pull", {})
			
 
				+
			
 
				+    txt_enabled_env = _get_env_bool("STORAGE_TXT_ENABLED")
			
 
				+    html_enabled_env = _get_env_bool("STORAGE_HTML_ENABLED")
			
 
				+    pull_enabled_env = _get_env_bool("PULL_ENABLED")
			
 
				+
			
 
				+    return {
			
 
				+        "BACKEND": _get_env_str("STORAGE_BACKEND") or storage.get("backend", "auto"),
			
 
				+        "FORMATS": {
			
 
				+            "SQLITE": formats.get("sqlite", True),
			
 
				+            "TXT": txt_enabled_env if txt_enabled_env is not None else formats.get("txt", True),
			
 
				+            "HTML": html_enabled_env if html_enabled_env is not None else formats.get("html", True),
			
 
				+        },
			
 
				+        "LOCAL": {
			
 
				+            "DATA_DIR": local.get("data_dir", "output"),
			
 
				+            "RETENTION_DAYS": _get_env_int("LOCAL_RETENTION_DAYS") or local.get("retention_days", 0),
			
 
				+        },
			
 
				+        "REMOTE": {
			
 
				+            "ENDPOINT_URL": _get_env_str("S3_ENDPOINT_URL") or remote.get("endpoint_url", ""),
			
 
				+            "BUCKET_NAME": _get_env_str("S3_BUCKET_NAME") or remote.get("bucket_name", ""),
			
 
				+            "ACCESS_KEY_ID": _get_env_str("S3_ACCESS_KEY_ID") or remote.get("access_key_id", ""),
			
 
				+            "SECRET_ACCESS_KEY": _get_env_str("S3_SECRET_ACCESS_KEY") or remote.get("secret_access_key", ""),
			
 
				+            "REGION": _get_env_str("S3_REGION") or remote.get("region", ""),
			
 
				+            "RETENTION_DAYS": _get_env_int("REMOTE_RETENTION_DAYS") or remote.get("retention_days", 0),
			
 
				+        },
			
 
				+        "PULL": {
			
 
				+            "ENABLED": pull_enabled_env if pull_enabled_env is not None else pull.get("enabled", False),
			
 
				+            "DAYS": _get_env_int("PULL_DAYS") or pull.get("days", 7),
			
 
				+        },
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+def _load_webhook_config(config_data: Dict) -> Dict:
			
 
				+    """加载 Webhook 配置"""
			
 
				+    notification = config_data.get("notification", {})
			
 
				+    webhooks = notification.get("webhooks", {})
			
 
				+
			
 
				+    return {
			
 
				+        # 飞书
			
 
				+        "FEISHU_WEBHOOK_URL": _get_env_str("FEISHU_WEBHOOK_URL") or webhooks.get("feishu_url", ""),
			
 
				+        # 钉钉
			
 
				+        "DINGTALK_WEBHOOK_URL": _get_env_str("DINGTALK_WEBHOOK_URL") or webhooks.get("dingtalk_url", ""),
			
 
				+        # 企业微信
			
 
				+        "WEWORK_WEBHOOK_URL": _get_env_str("WEWORK_WEBHOOK_URL") or webhooks.get("wework_url", ""),
			
 
				+        "WEWORK_MSG_TYPE": _get_env_str("WEWORK_MSG_TYPE") or webhooks.get("wework_msg_type", "markdown"),
			
 
				+        # Telegram
			
 
				+        "TELEGRAM_BOT_TOKEN": _get_env_str("TELEGRAM_BOT_TOKEN") or webhooks.get("telegram_bot_token", ""),
			
 
				+        "TELEGRAM_CHAT_ID": _get_env_str("TELEGRAM_CHAT_ID") or webhooks.get("telegram_chat_id", ""),
			
 
				+        # 邮件
			
 
				+        "EMAIL_FROM": _get_env_str("EMAIL_FROM") or webhooks.get("email_from", ""),
			
 
				+        "EMAIL_PASSWORD": _get_env_str("EMAIL_PASSWORD") or webhooks.get("email_password", ""),
			
 
				+        "EMAIL_TO": _get_env_str("EMAIL_TO") or webhooks.get("email_to", ""),
			
 
				+        "EMAIL_SMTP_SERVER": _get_env_str("EMAIL_SMTP_SERVER") or webhooks.get("email_smtp_server", ""),
			
 
				+        "EMAIL_SMTP_PORT": _get_env_str("EMAIL_SMTP_PORT") or webhooks.get("email_smtp_port", ""),
			
 
				+        # ntfy
			
 
				+        "NTFY_SERVER_URL": _get_env_str("NTFY_SERVER_URL") or webhooks.get("ntfy_server_url") or "https://ntfy.sh",
			
 
				+        "NTFY_TOPIC": _get_env_str("NTFY_TOPIC") or webhooks.get("ntfy_topic", ""),
			
 
				+        "NTFY_TOKEN": _get_env_str("NTFY_TOKEN") or webhooks.get("ntfy_token", ""),
			
 
				+        # Bark
			
 
				+        "BARK_URL": _get_env_str("BARK_URL") or webhooks.get("bark_url", ""),
			
 
				+        # Slack
			
 
				+        "SLACK_WEBHOOK_URL": _get_env_str("SLACK_WEBHOOK_URL") or webhooks.get("slack_webhook_url", ""),
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+def _print_notification_sources(config: Dict) -> None:
			
 
				+    """打印通知渠道配置来源信息"""
			
 
				+    notification_sources = []
			
 
				+    max_accounts = config["MAX_ACCOUNTS_PER_CHANNEL"]
			
 
				+
			
 
				+    if config["FEISHU_WEBHOOK_URL"]:
			
 
				+        accounts = parse_multi_account_config(config["FEISHU_WEBHOOK_URL"])
			
 
				+        count = min(len(accounts), max_accounts)
			
 
				+        source = "环境变量" if os.environ.get("FEISHU_WEBHOOK_URL") else "配置文件"
			
 
				+        notification_sources.append(f"飞书({source}, {count}个账号)")
			
 
				+
			
 
				+    if config["DINGTALK_WEBHOOK_URL"]:
			
 
				+        accounts = parse_multi_account_config(config["DINGTALK_WEBHOOK_URL"])
			
 
				+        count = min(len(accounts), max_accounts)
			
 
				+        source = "环境变量" if os.environ.get("DINGTALK_WEBHOOK_URL") else "配置文件"
			
 
				+        notification_sources.append(f"钉钉({source}, {count}个账号)")
			
 
				+
			
 
				+    if config["WEWORK_WEBHOOK_URL"]:
			
 
				+        accounts = parse_multi_account_config(config["WEWORK_WEBHOOK_URL"])
			
 
				+        count = min(len(accounts), max_accounts)
			
 
				+        source = "环境变量" if os.environ.get("WEWORK_WEBHOOK_URL") else "配置文件"
			
 
				+        notification_sources.append(f"企业微信({source}, {count}个账号)")
			
 
				+
			
 
				+    if config["TELEGRAM_BOT_TOKEN"] and config["TELEGRAM_CHAT_ID"]:
			
 
				+        tokens = parse_multi_account_config(config["TELEGRAM_BOT_TOKEN"])
			
 
				+        chat_ids = parse_multi_account_config(config["TELEGRAM_CHAT_ID"])
			
 
				+        valid, count = validate_paired_configs(
			
 
				+            {"bot_token": tokens, "chat_id": chat_ids},
			
 
				+            "Telegram",
			
 
				+            required_keys=["bot_token", "chat_id"]
			
 
				+        )
			
 
				+        if valid and count > 0:
			
 
				+            count = min(count, max_accounts)
			
 
				+            token_source = "环境变量" if os.environ.get("TELEGRAM_BOT_TOKEN") else "配置文件"
			
 
				+            notification_sources.append(f"Telegram({token_source}, {count}个账号)")
			
 
				+
			
 
				+    if config["EMAIL_FROM"] and config["EMAIL_PASSWORD"] and config["EMAIL_TO"]:
			
 
				+        from_source = "环境变量" if os.environ.get("EMAIL_FROM") else "配置文件"
			
 
				+        notification_sources.append(f"邮件({from_source})")
			
 
				+
			
 
				+    if config["NTFY_SERVER_URL"] and config["NTFY_TOPIC"]:
			
 
				+        topics = parse_multi_account_config(config["NTFY_TOPIC"])
			
 
				+        tokens = parse_multi_account_config(config["NTFY_TOKEN"])
			
 
				+        if tokens:
			
 
				+            valid, count = validate_paired_configs(
			
 
				+                {"topic": topics, "token": tokens},
			
 
				+                "ntfy"
			
 
				+            )
			
 
				+            if valid and count > 0:
			
 
				+                count = min(count, max_accounts)
			
 
				+                server_source = "环境变量" if os.environ.get("NTFY_SERVER_URL") else "配置文件"
			
 
				+                notification_sources.append(f"ntfy({server_source}, {count}个账号)")
			
 
				+        else:
			
 
				+            count = min(len(topics), max_accounts)
			
 
				+            server_source = "环境变量" if os.environ.get("NTFY_SERVER_URL") else "配置文件"
			
 
				+            notification_sources.append(f"ntfy({server_source}, {count}个账号)")
			
 
				+
			
 
				+    if config["BARK_URL"]:
			
 
				+        accounts = parse_multi_account_config(config["BARK_URL"])
			
 
				+        count = min(len(accounts), max_accounts)
			
 
				+        bark_source = "环境变量" if os.environ.get("BARK_URL") else "配置文件"
			
 
				+        notification_sources.append(f"Bark({bark_source}, {count}个账号)")
			
 
				+
			
 
				+    if config["SLACK_WEBHOOK_URL"]:
			
 
				+        accounts = parse_multi_account_config(config["SLACK_WEBHOOK_URL"])
			
 
				+        count = min(len(accounts), max_accounts)
			
 
				+        slack_source = "环境变量" if os.environ.get("SLACK_WEBHOOK_URL") else "配置文件"
			
 
				+        notification_sources.append(f"Slack({slack_source}, {count}个账号)")
			
 
				+
			
 
				+    if notification_sources:
			
 
				+        print(f"通知渠道配置来源: {', '.join(notification_sources)}")
			
 
				+        print(f"每个渠道最大账号数: {max_accounts}")
			
 
				+    else:
			
 
				+        print("未配置任何通知渠道")
			
 
				+
			
 
				+
			
 
				+def load_config(config_path: Optional[str] = None) -> Dict[str, Any]:
			
 
				+    """
			
 
				+    加载配置文件
			
 
				+
			
 
				+    Args:
			
 
				+        config_path: 配置文件路径，默认从环境变量 CONFIG_PATH 获取或使用 config/config.yaml
			
 
				+
			
 
				+    Returns:
			
 
				+        包含所有配置的字典
			
 
				+
			
 
				+    Raises:
			
 
				+        FileNotFoundError: 配置文件不存在
			
 
				+    """
			
 
				+    if config_path is None:
			
 
				+        config_path = os.environ.get("CONFIG_PATH", "config/config.yaml")
			
 
				+
			
 
				+    if not Path(config_path).exists():
			
 
				+        raise FileNotFoundError(f"配置文件 {config_path} 不存在")
			
 
				+
			
 
				+    with open(config_path, "r", encoding="utf-8") as f:
			
 
				+        config_data = yaml.safe_load(f)
			
 
				+
			
 
				+    print(f"配置文件加载成功: {config_path}")
			
 
				+
			
 
				+    # 合并所有配置
			
 
				+    config = {}
			
 
				+
			
 
				+    # 应用配置
			
 
				+    config.update(_load_app_config(config_data))
			
 
				+
			
 
				+    # 爬虫配置
			
 
				+    config.update(_load_crawler_config(config_data))
			
 
				+
			
 
				+    # 报告配置
			
 
				+    config.update(_load_report_config(config_data))
			
 
				+
			
 
				+    # 通知配置
			
 
				+    config.update(_load_notification_config(config_data))
			
 
				+
			
 
				+    # 推送窗口配置
			
 
				+    config["PUSH_WINDOW"] = _load_push_window_config(config_data)
			
 
				+
			
 
				+    # 权重配置
			
 
				+    config["WEIGHT_CONFIG"] = _load_weight_config(config_data)
			
 
				+
			
 
				+    # 平台配置
			
 
				+    config["PLATFORMS"] = config_data.get("platforms", [])
			
 
				+
			
 
				+    # 存储配置
			
 
				+    config["STORAGE"] = _load_storage_config(config_data)
			
 
				+
			
 
				+    # Webhook 配置
			
 
				+    config.update(_load_webhook_config(config_data))
			
 
				+
			
 
				+    # 打印通知渠道配置来源
			
 
				+    _print_notification_sources(config)
			
 
				+
			
 
				+    return config
			
--- a/trendradar/crawler/__init__.py
+++ b/trendradar/crawler/__init__.py
@@ -0,0 +1,8 @@
 
				+# coding=utf-8
			
 
				+"""
			
 
				+爬虫模块 - 数据抓取功能
			
 
				+"""
			
 
				+
			
 
				+from trendradar.crawler.fetcher import DataFetcher
			
 
				+
			
 
				+__all__ = ["DataFetcher"]
			
--- a/trendradar/crawler/fetcher.py
+++ b/trendradar/crawler/fetcher.py
@@ -0,0 +1,184 @@
 
				+# coding=utf-8
			
 
				+"""
			
 
				+数据获取器模块
			
 
				+
			
 
				+负责从 NewsNow API 抓取新闻数据，支持：
			
 
				+- 单个平台数据获取
			
 
				+- 批量平台数据爬取
			
 
				+- 自动重试机制
			
 
				+- 代理支持
			
 
				+"""
			
 
				+
			
 
				+import json
			
 
				+import random
			
 
				+import time
			
 
				+from typing import Dict, List, Tuple, Optional, Union
			
 
				+
			
 
				+import requests
			
 
				+
			
 
				+
			
 
				+class DataFetcher:
			
 
				+    """数据获取器"""
			
 
				+
			
 
				+    # 默认 API 地址
			
 
				+    DEFAULT_API_URL = "https://newsnow.busiyi.world/api/s"
			
 
				+
			
 
				+    # 默认请求头
			
 
				+    DEFAULT_HEADERS = {
			
 
				+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
			
 
				+        "Accept": "application/json, text/plain, */*",
			
 
				+        "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
			
 
				+        "Connection": "keep-alive",
			
 
				+        "Cache-Control": "no-cache",
			
 
				+    }
			
 
				+
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+        proxy_url: Optional[str] = None,
			
 
				+        api_url: Optional[str] = None,
			
 
				+    ):
			
 
				+        """
			
 
				+        初始化数据获取器
			
 
				+
			
 
				+        Args:
			
 
				+            proxy_url: 代理服务器 URL（可选）
			
 
				+            api_url: API 基础 URL（可选，默认使用 DEFAULT_API_URL）
			
 
				+        """
			
 
				+        self.proxy_url = proxy_url
			
 
				+        self.api_url = api_url or self.DEFAULT_API_URL
			
 
				+
			
 
				+    def fetch_data(
			
 
				+        self,
			
 
				+        id_info: Union[str, Tuple[str, str]],
			
 
				+        max_retries: int = 2,
			
 
				+        min_retry_wait: int = 3,
			
 
				+        max_retry_wait: int = 5,
			
 
				+    ) -> Tuple[Optional[str], str, str]:
			
 
				+        """
			
 
				+        获取指定ID数据，支持重试
			
 
				+
			
 
				+        Args:
			
 
				+            id_info: 平台ID 或 (平台ID, 别名) 元组
			
 
				+            max_retries: 最大重试次数
			
 
				+            min_retry_wait: 最小重试等待时间（秒）
			
 
				+            max_retry_wait: 最大重试等待时间（秒）
			
 
				+
			
 
				+        Returns:
			
 
				+            (响应文本, 平台ID, 别名) 元组，失败时响应文本为 None
			
 
				+        """
			
 
				+        if isinstance(id_info, tuple):
			
 
				+            id_value, alias = id_info
			
 
				+        else:
			
 
				+            id_value = id_info
			
 
				+            alias = id_value
			
 
				+
			
 
				+        url = f"{self.api_url}?id={id_value}&latest"
			
 
				+
			
 
				+        proxies = None
			
 
				+        if self.proxy_url:
			
 
				+            proxies = {"http": self.proxy_url, "https": self.proxy_url}
			
 
				+
			
 
				+        retries = 0
			
 
				+        while retries <= max_retries:
			
 
				+            try:
			
 
				+                response = requests.get(
			
 
				+                    url,
			
 
				+                    proxies=proxies,
			
 
				+                    headers=self.DEFAULT_HEADERS,
			
 
				+                    timeout=10,
			
 
				+                )
			
 
				+                response.raise_for_status()
			
 
				+
			
 
				+                data_text = response.text
			
 
				+                data_json = json.loads(data_text)
			
 
				+
			
 
				+                status = data_json.get("status", "未知")
			
 
				+                if status not in ["success", "cache"]:
			
 
				+                    raise ValueError(f"响应状态异常: {status}")
			
 
				+
			
 
				+                status_info = "最新数据" if status == "success" else "缓存数据"
			
 
				+                print(f"获取 {id_value} 成功（{status_info}）")
			
 
				+                return data_text, id_value, alias
			
 
				+
			
 
				+            except Exception as e:
			
 
				+                retries += 1
			
 
				+                if retries <= max_retries:
			
 
				+                    base_wait = random.uniform(min_retry_wait, max_retry_wait)
			
 
				+                    additional_wait = (retries - 1) * random.uniform(1, 2)
			
 
				+                    wait_time = base_wait + additional_wait
			
 
				+                    print(f"请求 {id_value} 失败: {e}. {wait_time:.2f}秒后重试...")
			
 
				+                    time.sleep(wait_time)
			
 
				+                else:
			
 
				+                    print(f"请求 {id_value} 失败: {e}")
			
 
				+                    return None, id_value, alias
			
 
				+
			
 
				+        return None, id_value, alias
			
 
				+
			
 
				+    def crawl_websites(
			
 
				+        self,
			
 
				+        ids_list: List[Union[str, Tuple[str, str]]],
			
 
				+        request_interval: int = 100,
			
 
				+    ) -> Tuple[Dict, Dict, List]:
			
 
				+        """
			
 
				+        爬取多个网站数据
			
 
				+
			
 
				+        Args:
			
 
				+            ids_list: 平台ID列表，每个元素可以是字符串或 (平台ID, 别名) 元组
			
 
				+            request_interval: 请求间隔（毫秒）
			
 
				+
			
 
				+        Returns:
			
 
				+            (结果字典, ID到名称的映射, 失败ID列表) 元组
			
 
				+        """
			
 
				+        results = {}
			
 
				+        id_to_name = {}
			
 
				+        failed_ids = []
			
 
				+
			
 
				+        for i, id_info in enumerate(ids_list):
			
 
				+            if isinstance(id_info, tuple):
			
 
				+                id_value, name = id_info
			
 
				+            else:
			
 
				+                id_value = id_info
			
 
				+                name = id_value
			
 
				+
			
 
				+            id_to_name[id_value] = name
			
 
				+            response, _, _ = self.fetch_data(id_info)
			
 
				+
			
 
				+            if response:
			
 
				+                try:
			
 
				+                    data = json.loads(response)
			
 
				+                    results[id_value] = {}
			
 
				+
			
 
				+                    for index, item in enumerate(data.get("items", []), 1):
			
 
				+                        title = item.get("title")
			
 
				+                        # 跳过无效标题（None、float、空字符串）
			
 
				+                        if title is None or isinstance(title, float) or not str(title).strip():
			
 
				+                            continue
			
 
				+                        title = str(title).strip()
			
 
				+                        url = item.get("url", "")
			
 
				+                        mobile_url = item.get("mobileUrl", "")
			
 
				+
			
 
				+                        if title in results[id_value]:
			
 
				+                            results[id_value][title]["ranks"].append(index)
			
 
				+                        else:
			
 
				+                            results[id_value][title] = {
			
 
				+                                "ranks": [index],
			
 
				+                                "url": url,
			
 
				+                                "mobileUrl": mobile_url,
			
 
				+                            }
			
 
				+                except json.JSONDecodeError:
			
 
				+                    print(f"解析 {id_value} 响应失败")
			
 
				+                    failed_ids.append(id_value)
			
 
				+                except Exception as e:
			
 
				+                    print(f"处理 {id_value} 数据出错: {e}")
			
 
				+                    failed_ids.append(id_value)
			
 
				+            else:
			
 
				+                failed_ids.append(id_value)
			
 
				+
			
 
				+            # 请求间隔（除了最后一个）
			
 
				+            if i < len(ids_list) - 1:
			
 
				+                actual_interval = request_interval + random.randint(-10, 20)
			
 
				+                actual_interval = max(50, actual_interval)
			
 
				+                time.sleep(actual_interval / 1000)
			
 
				+
			
 
				+        print(f"成功: {list(results.keys())}, 失败: {failed_ids}")
			
 
				+        return results, id_to_name, failed_ids
			
--- a/trendradar/notification/__init__.py
+++ b/trendradar/notification/__init__.py
@@ -0,0 +1,81 @@
 
				+# coding=utf-8
			
 
				+"""
			
 
				+通知推送模块
			
 
				+
			
 
				+提供多渠道通知推送功能，包括：
			
 
				+- 飞书、钉钉、企业微信
			
 
				+- Telegram、Slack
			
 
				+- Email、ntfy、Bark
			
 
				+
			
 
				+模块结构：
			
 
				+- push_manager: 推送记录管理
			
 
				+- formatters: 内容格式转换
			
 
				+- batch: 批次处理工具
			
 
				+- renderer: 通知内容渲染
			
 
				+- splitter: 消息分批拆分
			
 
				+- senders: 消息发送器（各渠道发送函数）
			
 
				+- dispatcher: 多账号通知调度器
			
 
				+"""
			
 
				+
			
 
				+from trendradar.notification.push_manager import PushRecordManager
			
 
				+from trendradar.notification.formatters import (
			
 
				+    strip_markdown,
			
 
				+    convert_markdown_to_mrkdwn,
			
 
				+)
			
 
				+from trendradar.notification.batch import (
			
 
				+    get_batch_header,
			
 
				+    get_max_batch_header_size,
			
 
				+    truncate_to_bytes,
			
 
				+    add_batch_headers,
			
 
				+)
			
 
				+from trendradar.notification.renderer import (
			
 
				+    render_feishu_content,
			
 
				+    render_dingtalk_content,
			
 
				+)
			
 
				+from trendradar.notification.splitter import (
			
 
				+    split_content_into_batches,
			
 
				+    DEFAULT_BATCH_SIZES,
			
 
				+)
			
 
				+from trendradar.notification.senders import (
			
 
				+    send_to_feishu,
			
 
				+    send_to_dingtalk,
			
 
				+    send_to_wework,
			
 
				+    send_to_telegram,
			
 
				+    send_to_email,
			
 
				+    send_to_ntfy,
			
 
				+    send_to_bark,
			
 
				+    send_to_slack,
			
 
				+    SMTP_CONFIGS,
			
 
				+)
			
 
				+from trendradar.notification.dispatcher import NotificationDispatcher
			
 
				+
			
 
				+__all__ = [
			
 
				+    # 推送记录管理
			
 
				+    "PushRecordManager",
			
 
				+    # 格式转换
			
 
				+    "strip_markdown",
			
 
				+    "convert_markdown_to_mrkdwn",
			
 
				+    # 批次处理
			
 
				+    "get_batch_header",
			
 
				+    "get_max_batch_header_size",
			
 
				+    "truncate_to_bytes",
			
 
				+    "add_batch_headers",
			
 
				+    # 内容渲染
			
 
				+    "render_feishu_content",
			
 
				+    "render_dingtalk_content",
			
 
				+    # 消息分批
			
 
				+    "split_content_into_batches",
			
 
				+    "DEFAULT_BATCH_SIZES",
			
 
				+    # 消息发送器
			
 
				+    "send_to_feishu",
			
 
				+    "send_to_dingtalk",
			
 
				+    "send_to_wework",
			
 
				+    "send_to_telegram",
			
 
				+    "send_to_email",
			
 
				+    "send_to_ntfy",
			
 
				+    "send_to_bark",
			
 
				+    "send_to_slack",
			
 
				+    "SMTP_CONFIGS",
			
 
				+    # 通知调度器
			
 
				+    "NotificationDispatcher",
			
 
				+]
			
--- a/trendradar/notification/batch.py
+++ b/trendradar/notification/batch.py
@@ -0,0 +1,115 @@
 
				+# coding=utf-8
			
 
				+"""
			
 
				+批次处理模块
			
 
				+
			
 
				+提供消息分批发送的辅助函数
			
 
				+"""
			
 
				+
			
 
				+from typing import List
			
 
				+
			
 
				+
			
 
				+def get_batch_header(format_type: str, batch_num: int, total_batches: int) -> str:
			
 
				+    """根据 format_type 生成对应格式的批次头部
			
 
				+
			
 
				+    Args:
			
 
				+        format_type: 推送类型（telegram, slack, wework_text, bark, feishu, dingtalk, ntfy, wework）
			
 
				+        batch_num: 当前批次编号
			
 
				+        total_batches: 总批次数
			
 
				+
			
 
				+    Returns:
			
 
				+        格式化的批次头部字符串
			
 
				+    """
			
 
				+    if format_type == "telegram":
			
 
				+        return f"<b>[第 {batch_num}/{total_batches} 批次]</b>\n\n"
			
 
				+    elif format_type == "slack":
			
 
				+        return f"*[第 {batch_num}/{total_batches} 批次]*\n\n"
			
 
				+    elif format_type in ("wework_text", "bark"):
			
 
				+        # 企业微信文本模式和 Bark 使用纯文本格式
			
 
				+        return f"[第 {batch_num}/{total_batches} 批次]\n\n"
			
 
				+    else:
			
 
				+        # 飞书、钉钉、ntfy、企业微信 markdown 模式
			
 
				+        return f"**[第 {batch_num}/{total_batches} 批次]**\n\n"
			
 
				+
			
 
				+
			
 
				+def get_max_batch_header_size(format_type: str) -> int:
			
 
				+    """估算批次头部的最大字节数（假设最多 99 批次）
			
 
				+
			
 
				+    用于在分批时预留空间，避免事后截断破坏内容完整性。
			
 
				+
			
 
				+    Args:
			
 
				+        format_type: 推送类型
			
 
				+
			
 
				+    Returns:
			
 
				+        最大头部字节数
			
 
				+    """
			
 
				+    # 生成最坏情况的头部（99/99 批次）
			
 
				+    max_header = get_batch_header(format_type, 99, 99)
			
 
				+    return len(max_header.encode("utf-8"))
			
 
				+
			
 
				+
			
 
				+def truncate_to_bytes(text: str, max_bytes: int) -> str:
			
 
				+    """安全截断字符串到指定字节数，避免截断多字节字符
			
 
				+
			
 
				+    Args:
			
 
				+        text: 要截断的文本
			
 
				+        max_bytes: 最大字节数
			
 
				+
			
 
				+    Returns:
			
 
				+        截断后的文本
			
 
				+    """
			
 
				+    text_bytes = text.encode("utf-8")
			
 
				+    if len(text_bytes) <= max_bytes:
			
 
				+        return text
			
 
				+
			
 
				+    # 截断到指定字节数
			
 
				+    truncated = text_bytes[:max_bytes]
			
 
				+
			
 
				+    # 处理可能的不完整 UTF-8 字符
			
 
				+    for i in range(min(4, len(truncated))):
			
 
				+        try:
			
 
				+            return truncated[: len(truncated) - i].decode("utf-8")
			
 
				+        except UnicodeDecodeError:
			
 
				+            continue
			
 
				+
			
 
				+    # 极端情况：返回空字符串
			
 
				+    return ""
			
 
				+
			
 
				+
			
 
				+def add_batch_headers(
			
 
				+    batches: List[str], format_type: str, max_bytes: int
			
 
				+) -> List[str]:
			
 
				+    """为批次添加头部，动态计算确保总大小不超过限制
			
 
				+
			
 
				+    Args:
			
 
				+        batches: 原始批次列表
			
 
				+        format_type: 推送类型（bark, telegram, feishu 等）
			
 
				+        max_bytes: 该推送类型的最大字节限制
			
 
				+
			
 
				+    Returns:
			
 
				+        添加头部后的批次列表
			
 
				+    """
			
 
				+    if len(batches) <= 1:
			
 
				+        return batches
			
 
				+
			
 
				+    total = len(batches)
			
 
				+    result = []
			
 
				+
			
 
				+    for i, content in enumerate(batches, 1):
			
 
				+        # 生成批次头部
			
 
				+        header = get_batch_header(format_type, i, total)
			
 
				+        header_size = len(header.encode("utf-8"))
			
 
				+
			
 
				+        # 动态计算允许的最大内容大小
			
 
				+        max_content_size = max_bytes - header_size
			
 
				+        content_size = len(content.encode("utf-8"))
			
 
				+
			
 
				+        # 如果超出，截断到安全大小
			
 
				+        if content_size > max_content_size:
			
 
				+            print(
			
 
				+                f"警告：{format_type} 第 {i}/{total} 批次内容({content_size}字节) + 头部({header_size}字节) 超出限制({max_bytes}字节)，截断到 {max_content_size} 字节"
			
 
				+            )
			
 
				+            content = truncate_to_bytes(content, max_content_size)
			
 
				+
			
 
				+        result.append(header + content)
			
 
				+
			
 
				+    return result
			
--- a/trendradar/notification/dispatcher.py
+++ b/trendradar/notification/dispatcher.py
@@ -0,0 +1,420 @@
 
				+# coding=utf-8
			
 
				+"""
			
 
				+通知调度器模块
			
 
				+
			
 
				+提供统一的通知分发接口。
			
 
				+支持所有通知渠道的多账号配置，使用 `;` 分隔多个账号。
			
 
				+
			
 
				+使用示例:
			
 
				+    dispatcher = NotificationDispatcher(config, get_time_func, split_content_func)
			
 
				+    results = dispatcher.dispatch_all(report_data, report_type, ...)
			
 
				+"""
			
 
				+
			
 
				+from typing import Any, Callable, Dict, List, Optional
			
 
				+
			
 
				+from trendradar.core.config import (
			
 
				+    get_account_at_index,
			
 
				+    limit_accounts,
			
 
				+    parse_multi_account_config,
			
 
				+    validate_paired_configs,
			
 
				+)
			
 
				+
			
 
				+from .senders import (
			
 
				+    send_to_bark,
			
 
				+    send_to_dingtalk,
			
 
				+    send_to_email,
			
 
				+    send_to_feishu,
			
 
				+    send_to_ntfy,
			
 
				+    send_to_slack,
			
 
				+    send_to_telegram,
			
 
				+    send_to_wework,
			
 
				+)
			
 
				+
			
 
				+
			
 
				+class NotificationDispatcher:
			
 
				+    """
			
 
				+    统一的多账号通知调度器
			
 
				+
			
 
				+    将多账号发送逻辑封装，提供简洁的 dispatch_all 接口。
			
 
				+    内部处理账号解析、数量限制、配对验证等逻辑。
			
 
				+    """
			
 
				+
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+        config: Dict[str, Any],
			
 
				+        get_time_func: Callable,
			
 
				+        split_content_func: Callable,
			
 
				+    ):
			
 
				+        """
			
 
				+        初始化通知调度器
			
 
				+
			
 
				+        Args:
			
 
				+            config: 完整的配置字典，包含所有通知渠道的配置
			
 
				+            get_time_func: 获取当前时间的函数
			
 
				+            split_content_func: 内容分批函数
			
 
				+        """
			
 
				+        self.config = config
			
 
				+        self.get_time_func = get_time_func
			
 
				+        self.split_content_func = split_content_func
			
 
				+        self.max_accounts = config.get("MAX_ACCOUNTS_PER_CHANNEL", 3)
			
 
				+
			
 
				+    def dispatch_all(
			
 
				+        self,
			
 
				+        report_data: Dict,
			
 
				+        report_type: str,
			
 
				+        update_info: Optional[Dict] = None,
			
 
				+        proxy_url: Optional[str] = None,
			
 
				+        mode: str = "daily",
			
 
				+        html_file_path: Optional[str] = None,
			
 
				+    ) -> Dict[str, bool]:
			
 
				+        """
			
 
				+        分发通知到所有已配置的渠道
			
 
				+
			
 
				+        Args:
			
 
				+            report_data: 报告数据（由 prepare_report_data 生成）
			
 
				+            report_type: 报告类型（如 "当日汇总"、"实时增量"）
			
 
				+            update_info: 版本更新信息（可选）
			
 
				+            proxy_url: 代理 URL（可选）
			
 
				+            mode: 报告模式 (daily/current/incremental)
			
 
				+            html_file_path: HTML 报告文件路径（邮件使用）
			
 
				+
			
 
				+        Returns:
			
 
				+            Dict[str, bool]: 每个渠道的发送结果，key 为渠道名，value 为是否成功
			
 
				+        """
			
 
				+        results = {}
			
 
				+
			
 
				+        # 飞书
			
 
				+        if self.config.get("FEISHU_WEBHOOK_URL"):
			
 
				+            results["feishu"] = self._send_feishu(
			
 
				+                report_data, report_type, update_info, proxy_url, mode
			
 
				+            )
			
 
				+
			
 
				+        # 钉钉
			
 
				+        if self.config.get("DINGTALK_WEBHOOK_URL"):
			
 
				+            results["dingtalk"] = self._send_dingtalk(
			
 
				+                report_data, report_type, update_info, proxy_url, mode
			
 
				+            )
			
 
				+
			
 
				+        # 企业微信
			
 
				+        if self.config.get("WEWORK_WEBHOOK_URL"):
			
 
				+            results["wework"] = self._send_wework(
			
 
				+                report_data, report_type, update_info, proxy_url, mode
			
 
				+            )
			
 
				+
			
 
				+        # Telegram（需要配对验证）
			
 
				+        if self.config.get("TELEGRAM_BOT_TOKEN") and self.config.get("TELEGRAM_CHAT_ID"):
			
 
				+            results["telegram"] = self._send_telegram(
			
 
				+                report_data, report_type, update_info, proxy_url, mode
			
 
				+            )
			
 
				+
			
 
				+        # ntfy（需要配对验证）
			
 
				+        if self.config.get("NTFY_SERVER_URL") and self.config.get("NTFY_TOPIC"):
			
 
				+            results["ntfy"] = self._send_ntfy(
			
 
				+                report_data, report_type, update_info, proxy_url, mode
			
 
				+            )
			
 
				+
			
 
				+        # Bark
			
 
				+        if self.config.get("BARK_URL"):
			
 
				+            results["bark"] = self._send_bark(
			
 
				+                report_data, report_type, update_info, proxy_url, mode
			
 
				+            )
			
 
				+
			
 
				+        # Slack
			
 
				+        if self.config.get("SLACK_WEBHOOK_URL"):
			
 
				+            results["slack"] = self._send_slack(
			
 
				+                report_data, report_type, update_info, proxy_url, mode
			
 
				+            )
			
 
				+
			
 
				+        # 邮件（保持原有逻辑，已支持多收件人）
			
 
				+        if (
			
 
				+            self.config.get("EMAIL_FROM")
			
 
				+            and self.config.get("EMAIL_PASSWORD")
			
 
				+            and self.config.get("EMAIL_TO")
			
 
				+        ):
			
 
				+            results["email"] = self._send_email(report_type, html_file_path)
			
 
				+
			
 
				+        return results
			
 
				+
			
 
				+    def _send_to_multi_accounts(
			
 
				+        self,
			
 
				+        channel_name: str,
			
 
				+        config_value: str,
			
 
				+        send_func: Callable[..., bool],
			
 
				+        **kwargs,
			
 
				+    ) -> bool:
			
 
				+        """
			
 
				+        通用多账号发送逻辑
			
 
				+
			
 
				+        Args:
			
 
				+            channel_name: 渠道名称（用于日志和账号数量限制提示）
			
 
				+            config_value: 配置值（可能包含多个账号，用 ; 分隔）
			
 
				+            send_func: 发送函数，签名为 (account, account_label=..., **kwargs) -> bool
			
 
				+            **kwargs: 传递给发送函数的其他参数
			
 
				+
			
 
				+        Returns:
			
 
				+            bool: 任一账号发送成功则返回 True
			
 
				+        """
			
 
				+        accounts = parse_multi_account_config(config_value)
			
 
				+        if not accounts:
			
 
				+            return False
			
 
				+
			
 
				+        accounts = limit_accounts(accounts, self.max_accounts, channel_name)
			
 
				+        results = []
			
 
				+
			
 
				+        for i, account in enumerate(accounts):
			
 
				+            if account:
			
 
				+                account_label = f"账号{i+1}" if len(accounts) > 1 else ""
			
 
				+                result = send_func(account, account_label=account_label, **kwargs)
			
 
				+                results.append(result)
			
 
				+
			
 
				+        return any(results) if results else False
			
 
				+
			
 
				+    def _send_feishu(
			
 
				+        self,
			
 
				+        report_data: Dict,
			
 
				+        report_type: str,
			
 
				+        update_info: Optional[Dict],
			
 
				+        proxy_url: Optional[str],
			
 
				+        mode: str,
			
 
				+    ) -> bool:
			
 
				+        """发送到飞书（多账号）"""
			
 
				+        return self._send_to_multi_accounts(
			
 
				+            channel_name="飞书",
			
 
				+            config_value=self.config["FEISHU_WEBHOOK_URL"],
			
 
				+            send_func=lambda url, account_label: send_to_feishu(
			
 
				+                webhook_url=url,
			
 
				+                report_data=report_data,
			
 
				+                report_type=report_type,
			
 
				+                update_info=update_info,
			
 
				+                proxy_url=proxy_url,
			
 
				+                mode=mode,
			
 
				+                account_label=account_label,
			
 
				+                batch_size=self.config.get("FEISHU_BATCH_SIZE", 29000),
			
 
				+                batch_interval=self.config.get("BATCH_SEND_INTERVAL", 1.0),
			
 
				+                split_content_func=self.split_content_func,
			
 
				+                get_time_func=self.get_time_func,
			
 
				+            ),
			
 
				+        )
			
 
				+
			
 
				+    def _send_dingtalk(
			
 
				+        self,
			
 
				+        report_data: Dict,
			
 
				+        report_type: str,
			
 
				+        update_info: Optional[Dict],
			
 
				+        proxy_url: Optional[str],
			
 
				+        mode: str,
			
 
				+    ) -> bool:
			
 
				+        """发送到钉钉（多账号）"""
			
 
				+        return self._send_to_multi_accounts(
			
 
				+            channel_name="钉钉",
			
 
				+            config_value=self.config["DINGTALK_WEBHOOK_URL"],
			
 
				+            send_func=lambda url, account_label: send_to_dingtalk(
			
 
				+                webhook_url=url,
			
 
				+                report_data=report_data,
			
 
				+                report_type=report_type,
			
 
				+                update_info=update_info,
			
 
				+                proxy_url=proxy_url,
			
 
				+                mode=mode,
			
 
				+                account_label=account_label,
			
 
				+                batch_size=self.config.get("DINGTALK_BATCH_SIZE", 20000),
			
 
				+                batch_interval=self.config.get("BATCH_SEND_INTERVAL", 1.0),
			
 
				+                split_content_func=self.split_content_func,
			
 
				+            ),
			
 
				+        )
			
 
				+
			
 
				+    def _send_wework(
			
 
				+        self,
			
 
				+        report_data: Dict,
			
 
				+        report_type: str,
			
 
				+        update_info: Optional[Dict],
			
 
				+        proxy_url: Optional[str],
			
 
				+        mode: str,
			
 
				+    ) -> bool:
			
 
				+        """发送到企业微信（多账号）"""
			
 
				+        return self._send_to_multi_accounts(
			
 
				+            channel_name="企业微信",
			
 
				+            config_value=self.config["WEWORK_WEBHOOK_URL"],
			
 
				+            send_func=lambda url, account_label: send_to_wework(
			
 
				+                webhook_url=url,
			
 
				+                report_data=report_data,
			
 
				+                report_type=report_type,
			
 
				+                update_info=update_info,
			
 
				+                proxy_url=proxy_url,
			
 
				+                mode=mode,
			
 
				+                account_label=account_label,
			
 
				+                batch_size=self.config.get("MESSAGE_BATCH_SIZE", 4000),
			
 
				+                batch_interval=self.config.get("BATCH_SEND_INTERVAL", 1.0),
			
 
				+                msg_type=self.config.get("WEWORK_MSG_TYPE", "markdown"),
			
 
				+                split_content_func=self.split_content_func,
			
 
				+            ),
			
 
				+        )
			
 
				+
			
 
				+    def _send_telegram(
			
 
				+        self,
			
 
				+        report_data: Dict,
			
 
				+        report_type: str,
			
 
				+        update_info: Optional[Dict],
			
 
				+        proxy_url: Optional[str],
			
 
				+        mode: str,
			
 
				+    ) -> bool:
			
 
				+        """发送到 Telegram（多账号，需验证 token 和 chat_id 配对）"""
			
 
				+        telegram_tokens = parse_multi_account_config(self.config["TELEGRAM_BOT_TOKEN"])
			
 
				+        telegram_chat_ids = parse_multi_account_config(self.config["TELEGRAM_CHAT_ID"])
			
 
				+
			
 
				+        if not telegram_tokens or not telegram_chat_ids:
			
 
				+            return False
			
 
				+
			
 
				+        # 验证配对
			
 
				+        valid, count = validate_paired_configs(
			
 
				+            {"bot_token": telegram_tokens, "chat_id": telegram_chat_ids},
			
 
				+            "Telegram",
			
 
				+            required_keys=["bot_token", "chat_id"],
			
 
				+        )
			
 
				+        if not valid or count == 0:
			
 
				+            return False
			
 
				+
			
 
				+        # 限制账号数量
			
 
				+        telegram_tokens = limit_accounts(telegram_tokens, self.max_accounts, "Telegram")
			
 
				+        telegram_chat_ids = telegram_chat_ids[: len(telegram_tokens)]
			
 
				+
			
 
				+        results = []
			
 
				+        for i in range(len(telegram_tokens)):
			
 
				+            token = telegram_tokens[i]
			
 
				+            chat_id = telegram_chat_ids[i]
			
 
				+            if token and chat_id:
			
 
				+                account_label = f"账号{i+1}" if len(telegram_tokens) > 1 else ""
			
 
				+                result = send_to_telegram(
			
 
				+                    bot_token=token,
			
 
				+                    chat_id=chat_id,
			
 
				+                    report_data=report_data,
			
 
				+                    report_type=report_type,
			
 
				+                    update_info=update_info,
			
 
				+                    proxy_url=proxy_url,
			
 
				+                    mode=mode,
			
 
				+                    account_label=account_label,
			
 
				+                    batch_size=self.config.get("MESSAGE_BATCH_SIZE", 4000),
			
 
				+                    batch_interval=self.config.get("BATCH_SEND_INTERVAL", 1.0),
			
 
				+                    split_content_func=self.split_content_func,
			
 
				+                )
			
 
				+                results.append(result)
			
 
				+
			
 
				+        return any(results) if results else False
			
 
				+
			
 
				+    def _send_ntfy(
			
 
				+        self,
			
 
				+        report_data: Dict,
			
 
				+        report_type: str,
			
 
				+        update_info: Optional[Dict],
			
 
				+        proxy_url: Optional[str],
			
 
				+        mode: str,
			
 
				+    ) -> bool:
			
 
				+        """发送到 ntfy（多账号，需验证 topic 和 token 配对）"""
			
 
				+        ntfy_server_url = self.config["NTFY_SERVER_URL"]
			
 
				+        ntfy_topics = parse_multi_account_config(self.config["NTFY_TOPIC"])
			
 
				+        ntfy_tokens = parse_multi_account_config(self.config.get("NTFY_TOKEN", ""))
			
 
				+
			
 
				+        if not ntfy_server_url or not ntfy_topics:
			
 
				+            return False
			
 
				+
			
 
				+        # 验证 token 和 topic 数量一致（如果配置了 token）
			
 
				+        if ntfy_tokens and len(ntfy_tokens) != len(ntfy_topics):
			
 
				+            print(
			
 
				+                f"❌ ntfy 配置错误：topic 数量({len(ntfy_topics)})与 token 数量({len(ntfy_tokens)})不一致，跳过 ntfy 推送"
			
 
				+            )
			
 
				+            return False
			
 
				+
			
 
				+        # 限制账号数量
			
 
				+        ntfy_topics = limit_accounts(ntfy_topics, self.max_accounts, "ntfy")
			
 
				+        if ntfy_tokens:
			
 
				+            ntfy_tokens = ntfy_tokens[: len(ntfy_topics)]
			
 
				+
			
 
				+        results = []
			
 
				+        for i, topic in enumerate(ntfy_topics):
			
 
				+            if topic:
			
 
				+                token = get_account_at_index(ntfy_tokens, i, "") if ntfy_tokens else ""
			
 
				+                account_label = f"账号{i+1}" if len(ntfy_topics) > 1 else ""
			
 
				+                result = send_to_ntfy(
			
 
				+                    server_url=ntfy_server_url,
			
 
				+                    topic=topic,
			
 
				+                    token=token,
			
 
				+                    report_data=report_data,
			
 
				+                    report_type=report_type,
			
 
				+                    update_info=update_info,
			
 
				+                    proxy_url=proxy_url,
			
 
				+                    mode=mode,
			
 
				+                    account_label=account_label,
			
 
				+                    batch_size=3800,
			
 
				+                    split_content_func=self.split_content_func,
			
 
				+                )
			
 
				+                results.append(result)
			
 
				+
			
 
				+        return any(results) if results else False
			
 
				+
			
 
				+    def _send_bark(
			
 
				+        self,
			
 
				+        report_data: Dict,
			
 
				+        report_type: str,
			
 
				+        update_info: Optional[Dict],
			
 
				+        proxy_url: Optional[str],
			
 
				+        mode: str,
			
 
				+    ) -> bool:
			
 
				+        """发送到 Bark（多账号）"""
			
 
				+        return self._send_to_multi_accounts(
			
 
				+            channel_name="Bark",
			
 
				+            config_value=self.config["BARK_URL"],
			
 
				+            send_func=lambda url, account_label: send_to_bark(
			
 
				+                bark_url=url,
			
 
				+                report_data=report_data,
			
 
				+                report_type=report_type,
			
 
				+                update_info=update_info,
			
 
				+                proxy_url=proxy_url,
			
 
				+                mode=mode,
			
 
				+                account_label=account_label,
			
 
				+                batch_size=self.config.get("BARK_BATCH_SIZE", 3600),
			
 
				+                batch_interval=self.config.get("BATCH_SEND_INTERVAL", 1.0),
			
 
				+                split_content_func=self.split_content_func,
			
 
				+            ),
			
 
				+        )
			
 
				+
			
 
				+    def _send_slack(
			
 
				+        self,
			
 
				+        report_data: Dict,
			
 
				+        report_type: str,
			
 
				+        update_info: Optional[Dict],
			
 
				+        proxy_url: Optional[str],
			
 
				+        mode: str,
			
 
				+    ) -> bool:
			
 
				+        """发送到 Slack（多账号）"""
			
 
				+        return self._send_to_multi_accounts(
			
 
				+            channel_name="Slack",
			
 
				+            config_value=self.config["SLACK_WEBHOOK_URL"],
			
 
				+            send_func=lambda url, account_label: send_to_slack(
			
 
				+                webhook_url=url,
			
 
				+                report_data=report_data,
			
 
				+                report_type=report_type,
			
 
				+                update_info=update_info,
			
 
				+                proxy_url=proxy_url,
			
 
				+                mode=mode,
			
 
				+                account_label=account_label,
			
 
				+                batch_size=self.config.get("SLACK_BATCH_SIZE", 4000),
			
 
				+                batch_interval=self.config.get("BATCH_SEND_INTERVAL", 1.0),
			
 
				+                split_content_func=self.split_content_func,
			
 
				+            ),
			
 
				+        )
			
 
				+
			
 
				+    def _send_email(
			
 
				+        self,
			
 
				+        report_type: str,
			
 
				+        html_file_path: Optional[str],
			
 
				+    ) -> bool:
			
 
				+        """发送邮件（保持原有逻辑，已支持多收件人）"""
			
 
				+        return send_to_email(
			
 
				+            from_email=self.config["EMAIL_FROM"],
			
 
				+            password=self.config["EMAIL_PASSWORD"],
			
 
				+            to_email=self.config["EMAIL_TO"],
			
 
				+            report_type=report_type,
			
 
				+            html_file_path=html_file_path,
			
 
				+            custom_smtp_server=self.config.get("EMAIL_SMTP_SERVER", ""),
			
 
				+            custom_smtp_port=self.config.get("EMAIL_SMTP_PORT", ""),
			
 
				+            get_time_func=self.get_time_func,
			
 
				+        )
			
--- a/trendradar/notification/formatters.py
+++ b/trendradar/notification/formatters.py
@@ -0,0 +1,80 @@
 
				+# coding=utf-8
			
 
				+"""
			
 
				+通知内容格式转换模块
			
 
				+
			
 
				+提供不同推送平台间的格式转换功能
			
 
				+"""
			
 
				+
			
 
				+import re
			
 
				+
			
 
				+
			
 
				+def strip_markdown(text: str) -> str:
			
 
				+    """去除文本中的 markdown 语法格式，用于个人微信推送
			
 
				+
			
 
				+    Args:
			
 
				+        text: 包含 markdown 格式的文本
			
 
				+
			
 
				+    Returns:
			
 
				+        纯文本内容
			
 
				+    """
			
 
				+    # 去除粗体 **text** 或 __text__
			
 
				+    text = re.sub(r'\*\*(.+?)\*\*', r'\1', text)
			
 
				+    text = re.sub(r'__(.+?)__', r'\1', text)
			
 
				+
			
 
				+    # 去除斜体 *text* 或 _text_
			
 
				+    text = re.sub(r'\*(.+?)\*', r'\1', text)
			
 
				+    text = re.sub(r'_(.+?)_', r'\1', text)
			
 
				+
			
 
				+    # 去除删除线 ~~text~~
			
 
				+    text = re.sub(r'~~(.+?)~~', r'\1', text)
			
 
				+
			
 
				+    # 转换链接 [text](url) -> text url（保留 URL）
			
 
				+    text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', r'\1 \2', text)
			
 
				+
			
 
				+    # 去除图片 ![alt](url) -> alt
			
 
				+    text = re.sub(r'!\[(.+?)\]\(.+?\)', r'\1', text)
			
 
				+
			
 
				+    # 去除行内代码 `code`
			
 
				+    text = re.sub(r'`(.+?)`', r'\1', text)
			
 
				+
			
 
				+    # 去除引用符号 >
			
 
				+    text = re.sub(r'^>\s*', '', text, flags=re.MULTILINE)
			
 
				+
			
 
				+    # 去除标题符号 # ## ### 等
			
 
				+    text = re.sub(r'^#+\s*', '', text, flags=re.MULTILINE)
			
 
				+
			
 
				+    # 去除水平分割线 --- 或 ***
			
 
				+    text = re.sub(r'^[\-\*]{3,}\s*$', '', text, flags=re.MULTILINE)
			
 
				+
			
 
				+    # 去除 HTML 标签 <font color='xxx'>text</font> -> text
			
 
				+    text = re.sub(r'<font[^>]*>(.+?)</font>', r'\1', text)
			
 
				+    text = re.sub(r'<[^>]+>', '', text)
			
 
				+
			
 
				+    # 清理多余的空行（保留最多两个连续空行）
			
 
				+    text = re.sub(r'\n{3,}', '\n\n', text)
			
 
				+
			
 
				+    return text.strip()
			
 
				+
			
 
				+
			
 
				+def convert_markdown_to_mrkdwn(content: str) -> str:
			
 
				+    """
			
 
				+    将标准 Markdown 转换为 Slack 的 mrkdwn 格式
			
 
				+
			
 
				+    转换规则：
			
 
				+    - **粗体** → *粗体*
			
 
				+    - [文本](url) → <url|文本>
			
 
				+    - 保留其他格式（代码块、列表等）
			
 
				+
			
 
				+    Args:
			
 
				+        content: Markdown 格式的内容
			
 
				+
			
 
				+    Returns:
			
 
				+        Slack mrkdwn 格式的内容
			
 
				+    """
			
 
				+    # 1. 转换链接格式: [文本](url) → <url|文本>
			
 
				+    content = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', r'<\2|\1>', content)
			
 
				+
			
 
				+    # 2. 转换粗体: **文本** → *文本*
			
 
				+    content = re.sub(r'\*\*([^*]+)\*\*', r'*\1*', content)
			
 
				+
			
 
				+    return content
			
--- a/trendradar/notification/push_manager.py
+++ b/trendradar/notification/push_manager.py
@@ -0,0 +1,109 @@
 
				+# coding=utf-8
			
 
				+"""
			
 
				+推送记录管理模块
			
 
				+
			
 
				+管理推送记录，支持每日只推送一次和时间窗口控制
			
 
				+通过 storage_backend 统一存储，支持本地 SQLite 和远程云存储
			
 
				+"""
			
 
				+
			
 
				+from datetime import datetime
			
 
				+from typing import Callable, Optional, Any
			
 
				+
			
 
				+import pytz
			
 
				+
			
 
				+
			
 
				+class PushRecordManager:
			
 
				+    """
			
 
				+    推送记录管理器
			
 
				+
			
 
				+    通过 storage_backend 统一管理推送记录：
			
 
				+    - 本地环境：使用 LocalStorageBackend，数据存储在本地 SQLite
			
 
				+    - GitHub Actions：使用 RemoteStorageBackend，数据存储在云端
			
 
				+
			
 
				+    这样 once_per_day 功能在 GitHub Actions 上也能正常工作。
			
 
				+    """
			
 
				+
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+        storage_backend: Any,
			
 
				+        get_time_func: Optional[Callable[[], datetime]] = None,
			
 
				+    ):
			
 
				+        """
			
 
				+        初始化推送记录管理器
			
 
				+
			
 
				+        Args:
			
 
				+            storage_backend: 存储后端实例（LocalStorageBackend 或 RemoteStorageBackend）
			
 
				+            get_time_func: 获取当前时间的函数（应使用配置的时区）
			
 
				+        """
			
 
				+        self.storage_backend = storage_backend
			
 
				+        self.get_time = get_time_func or self._default_get_time
			
 
				+
			
 
				+        print(f"[推送记录] 使用 {storage_backend.backend_name} 存储后端")
			
 
				+
			
 
				+    def _default_get_time(self) -> datetime:
			
 
				+        """默认时间获取函数（UTC+8）"""
			
 
				+        return datetime.now(pytz.timezone("Asia/Shanghai"))
			
 
				+
			
 
				+    def has_pushed_today(self) -> bool:
			
 
				+        """
			
 
				+        检查今天是否已经推送过
			
 
				+
			
 
				+        Returns:
			
 
				+            是否已推送
			
 
				+        """
			
 
				+        return self.storage_backend.has_pushed_today()
			
 
				+
			
 
				+    def record_push(self, report_type: str) -> bool:
			
 
				+        """
			
 
				+        记录推送
			
 
				+
			
 
				+        Args:
			
 
				+            report_type: 报告类型
			
 
				+
			
 
				+        Returns:
			
 
				+            是否记录成功
			
 
				+        """
			
 
				+        return self.storage_backend.record_push(report_type)
			
 
				+
			
 
				+    def is_in_time_range(self, start_time: str, end_time: str) -> bool:
			
 
				+        """
			
 
				+        检查当前时间是否在指定时间范围内
			
 
				+
			
 
				+        Args:
			
 
				+            start_time: 开始时间（格式：HH:MM）
			
 
				+            end_time: 结束时间（格式：HH:MM）
			
 
				+
			
 
				+        Returns:
			
 
				+            是否在时间范围内
			
 
				+        """
			
 
				+        now = self.get_time()
			
 
				+        current_time = now.strftime("%H:%M")
			
 
				+
			
 
				+        def normalize_time(time_str: str) -> str:
			
 
				+            """将时间字符串标准化为 HH:MM 格式"""
			
 
				+            try:
			
 
				+                parts = time_str.strip().split(":")
			
 
				+                if len(parts) != 2:
			
 
				+                    raise ValueError(f"时间格式错误: {time_str}")
			
 
				+
			
 
				+                hour = int(parts[0])
			
 
				+                minute = int(parts[1])
			
 
				+
			
 
				+                if not (0 <= hour <= 23 and 0 <= minute <= 59):
			
 
				+                    raise ValueError(f"时间范围错误: {time_str}")
			
 
				+
			
 
				+                return f"{hour:02d}:{minute:02d}"
			
 
				+            except Exception as e:
			
 
				+                print(f"时间格式化错误 '{time_str}': {e}")
			
 
				+                return time_str
			
 
				+
			
 
				+        normalized_start = normalize_time(start_time)
			
 
				+        normalized_end = normalize_time(end_time)
			
 
				+        normalized_current = normalize_time(current_time)
			
 
				+
			
 
				+        result = normalized_start <= normalized_current <= normalized_end
			
 
				+
			
 
				+        if not result:
			
 
				+            print(f"时间窗口判断：当前 {normalized_current}，窗口 {normalized_start}-{normalized_end}")
			
 
				+
			
 
				+        return result
			
--- a/trendradar/notification/renderer.py
+++ b/trendradar/notification/renderer.py
@@ -0,0 +1,260 @@
 
				+# coding=utf-8
			
 
				+"""
			
 
				+通知内容渲染模块
			
 
				+
			
 
				+提供多平台通知内容渲染功能，生成格式化的推送消息
			
 
				+"""
			
 
				+
			
 
				+from datetime import datetime
			
 
				+from typing import Dict, List, Optional, Callable
			
 
				+
			
 
				+from trendradar.report.formatter import format_title_for_platform
			
 
				+
			
 
				+
			
 
				+def render_feishu_content(
			
 
				+    report_data: Dict,
			
 
				+    update_info: Optional[Dict] = None,
			
 
				+    mode: str = "daily",
			
 
				+    separator: str = "---",
			
 
				+    reverse_content_order: bool = False,
			
 
				+    get_time_func: Optional[Callable[[], datetime]] = None,
			
 
				+) -> str:
			
 
				+    """渲染飞书通知内容
			
 
				+
			
 
				+    Args:
			
 
				+        report_data: 报告数据字典，包含 stats, new_titles, failed_ids, total_new_count
			
 
				+        update_info: 版本更新信息（可选）
			
 
				+        mode: 报告模式 ("daily", "incremental", "current")
			
 
				+        separator: 内容分隔符
			
 
				+        reverse_content_order: 是否反转内容顺序（新增在前）
			
 
				+        get_time_func: 获取当前时间的函数（可选，默认使用 datetime.now()）
			
 
				+
			
 
				+    Returns:
			
 
				+        格式化的飞书消息内容
			
 
				+    """
			
 
				+    # 生成热点词汇统计部分
			
 
				+    stats_content = ""
			
 
				+    if report_data["stats"]:
			
 
				+        stats_content += "📊 **热点词汇统计**\n\n"
			
 
				+
			
 
				+        total_count = len(report_data["stats"])
			
 
				+
			
 
				+        for i, stat in enumerate(report_data["stats"]):
			
 
				+            word = stat["word"]
			
 
				+            count = stat["count"]
			
 
				+
			
 
				+            sequence_display = f"<font color='grey'>[{i + 1}/{total_count}]</font>"
			
 
				+
			
 
				+            if count >= 10:
			
 
				+                stats_content += f"🔥 {sequence_display} **{word}** : <font color='red'>{count}</font> 条\n\n"
			
 
				+            elif count >= 5:
			
 
				+                stats_content += f"📈 {sequence_display} **{word}** : <font color='orange'>{count}</font> 条\n\n"
			
 
				+            else:
			
 
				+                stats_content += f"📌 {sequence_display} **{word}** : {count} 条\n\n"
			
 
				+
			
 
				+            for j, title_data in enumerate(stat["titles"], 1):
			
 
				+                formatted_title = format_title_for_platform(
			
 
				+                    "feishu", title_data, show_source=True
			
 
				+                )
			
 
				+                stats_content += f"  {j}. {formatted_title}\n"
			
 
				+
			
 
				+                if j < len(stat["titles"]):
			
 
				+                    stats_content += "\n"
			
 
				+
			
 
				+            if i < len(report_data["stats"]) - 1:
			
 
				+                stats_content += f"\n{separator}\n\n"
			
 
				+
			
 
				+    # 生成新增新闻部分
			
 
				+    new_titles_content = ""
			
 
				+    if report_data["new_titles"]:
			
 
				+        new_titles_content += (
			
 
				+            f"🆕 **本次新增热点新闻** (共 {report_data['total_new_count']} 条)\n\n"
			
 
				+        )
			
 
				+
			
 
				+        for source_data in report_data["new_titles"]:
			
 
				+            new_titles_content += (
			
 
				+                f"**{source_data['source_name']}** ({len(source_data['titles'])} 条):\n"
			
 
				+            )
			
 
				+
			
 
				+            for j, title_data in enumerate(source_data["titles"], 1):
			
 
				+                title_data_copy = title_data.copy()
			
 
				+                title_data_copy["is_new"] = False
			
 
				+                formatted_title = format_title_for_platform(
			
 
				+                    "feishu", title_data_copy, show_source=False
			
 
				+                )
			
 
				+                new_titles_content += f"  {j}. {formatted_title}\n"
			
 
				+
			
 
				+            new_titles_content += "\n"
			
 
				+
			
 
				+    # 根据配置决定内容顺序
			
 
				+    text_content = ""
			
 
				+    if reverse_content_order:
			
 
				+        # 新增热点在前，热点词汇统计在后
			
 
				+        if new_titles_content:
			
 
				+            text_content += new_titles_content
			
 
				+            if stats_content:
			
 
				+                text_content += f"\n{separator}\n\n"
			
 
				+        if stats_content:
			
 
				+            text_content += stats_content
			
 
				+    else:
			
 
				+        # 默认：热点词汇统计在前，新增热点在后
			
 
				+        if stats_content:
			
 
				+            text_content += stats_content
			
 
				+            if new_titles_content:
			
 
				+                text_content += f"\n{separator}\n\n"
			
 
				+        if new_titles_content:
			
 
				+            text_content += new_titles_content
			
 
				+
			
 
				+    if not text_content:
			
 
				+        if mode == "incremental":
			
 
				+            mode_text = "增量模式下暂无新增匹配的热点词汇"
			
 
				+        elif mode == "current":
			
 
				+            mode_text = "当前榜单模式下暂无匹配的热点词汇"
			
 
				+        else:
			
 
				+            mode_text = "暂无匹配的热点词汇"
			
 
				+        text_content = f"📭 {mode_text}\n\n"
			
 
				+
			
 
				+    if report_data["failed_ids"]:
			
 
				+        if text_content and "暂无匹配" not in text_content:
			
 
				+            text_content += f"\n{separator}\n\n"
			
 
				+
			
 
				+        text_content += "⚠️ **数据获取失败的平台：**\n\n"
			
 
				+        for i, id_value in enumerate(report_data["failed_ids"], 1):
			
 
				+            text_content += f"  • <font color='red'>{id_value}</font>\n"
			
 
				+
			
 
				+    # 获取当前时间
			
 
				+    now = get_time_func() if get_time_func else datetime.now()
			
 
				+    text_content += (
			
 
				+        f"\n\n<font color='grey'>更新时间：{now.strftime('%Y-%m-%d %H:%M:%S')}</font>"
			
 
				+    )
			
 
				+
			
 
				+    if update_info:
			
 
				+        text_content += f"\n<font color='grey'>TrendRadar 发现新版本 {update_info['remote_version']}，当前 {update_info['current_version']}</font>"
			
 
				+
			
 
				+    return text_content
			
 
				+
			
 
				+
			
 
				+def render_dingtalk_content(
			
 
				+    report_data: Dict,
			
 
				+    update_info: Optional[Dict] = None,
			
 
				+    mode: str = "daily",
			
 
				+    reverse_content_order: bool = False,
			
 
				+    get_time_func: Optional[Callable[[], datetime]] = None,
			
 
				+) -> str:
			
 
				+    """渲染钉钉通知内容
			
 
				+
			
 
				+    Args:
			
 
				+        report_data: 报告数据字典，包含 stats, new_titles, failed_ids, total_new_count
			
 
				+        update_info: 版本更新信息（可选）
			
 
				+        mode: 报告模式 ("daily", "incremental", "current")
			
 
				+        reverse_content_order: 是否反转内容顺序（新增在前）
			
 
				+        get_time_func: 获取当前时间的函数（可选，默认使用 datetime.now()）
			
 
				+
			
 
				+    Returns:
			
 
				+        格式化的钉钉消息内容
			
 
				+    """
			
 
				+    total_titles = sum(
			
 
				+        len(stat["titles"]) for stat in report_data["stats"] if stat["count"] > 0
			
 
				+    )
			
 
				+    now = get_time_func() if get_time_func else datetime.now()
			
 
				+
			
 
				+    # 头部信息
			
 
				+    header_content = f"**总新闻数：** {total_titles}\n\n"
			
 
				+    header_content += f"**时间：** {now.strftime('%Y-%m-%d %H:%M:%S')}\n\n"
			
 
				+    header_content += "**类型：** 热点分析报告\n\n"
			
 
				+    header_content += "---\n\n"
			
 
				+
			
 
				+    # 生成热点词汇统计部分
			
 
				+    stats_content = ""
			
 
				+    if report_data["stats"]:
			
 
				+        stats_content += "📊 **热点词汇统计**\n\n"
			
 
				+
			
 
				+        total_count = len(report_data["stats"])
			
 
				+
			
 
				+        for i, stat in enumerate(report_data["stats"]):
			
 
				+            word = stat["word"]
			
 
				+            count = stat["count"]
			
 
				+
			
 
				+            sequence_display = f"[{i + 1}/{total_count}]"
			
 
				+
			
 
				+            if count >= 10:
			
 
				+                stats_content += f"🔥 {sequence_display} **{word}** : **{count}** 条\n\n"
			
 
				+            elif count >= 5:
			
 
				+                stats_content += f"📈 {sequence_display} **{word}** : **{count}** 条\n\n"
			
 
				+            else:
			
 
				+                stats_content += f"📌 {sequence_display} **{word}** : {count} 条\n\n"
			
 
				+
			
 
				+            for j, title_data in enumerate(stat["titles"], 1):
			
 
				+                formatted_title = format_title_for_platform(
			
 
				+                    "dingtalk", title_data, show_source=True
			
 
				+                )
			
 
				+                stats_content += f"  {j}. {formatted_title}\n"
			
 
				+
			
 
				+                if j < len(stat["titles"]):
			
 
				+                    stats_content += "\n"
			
 
				+
			
 
				+            if i < len(report_data["stats"]) - 1:
			
 
				+                stats_content += "\n---\n\n"
			
 
				+
			
 
				+    # 生成新增新闻部分
			
 
				+    new_titles_content = ""
			
 
				+    if report_data["new_titles"]:
			
 
				+        new_titles_content += (
			
 
				+            f"🆕 **本次新增热点新闻** (共 {report_data['total_new_count']} 条)\n\n"
			
 
				+        )
			
 
				+
			
 
				+        for source_data in report_data["new_titles"]:
			
 
				+            new_titles_content += f"**{source_data['source_name']}** ({len(source_data['titles'])} 条):\n\n"
			
 
				+
			
 
				+            for j, title_data in enumerate(source_data["titles"], 1):
			
 
				+                title_data_copy = title_data.copy()
			
 
				+                title_data_copy["is_new"] = False
			
 
				+                formatted_title = format_title_for_platform(
			
 
				+                    "dingtalk", title_data_copy, show_source=False
			
 
				+                )
			
 
				+                new_titles_content += f"  {j}. {formatted_title}\n"
			
 
				+
			
 
				+            new_titles_content += "\n"
			
 
				+
			
 
				+    # 根据配置决定内容顺序
			
 
				+    text_content = header_content
			
 
				+    if reverse_content_order:
			
 
				+        # 新增热点在前，热点词汇统计在后
			
 
				+        if new_titles_content:
			
 
				+            text_content += new_titles_content
			
 
				+            if stats_content:
			
 
				+                text_content += "\n---\n\n"
			
 
				+        if stats_content:
			
 
				+            text_content += stats_content
			
 
				+    else:
			
 
				+        # 默认：热点词汇统计在前，新增热点在后
			
 
				+        if stats_content:
			
 
				+            text_content += stats_content
			
 
				+            if new_titles_content:
			
 
				+                text_content += "\n---\n\n"
			
 
				+        if new_titles_content:
			
 
				+            text_content += new_titles_content
			
 
				+
			
 
				+    if not stats_content and not new_titles_content:
			
 
				+        if mode == "incremental":
			
 
				+            mode_text = "增量模式下暂无新增匹配的热点词汇"
			
 
				+        elif mode == "current":
			
 
				+            mode_text = "当前榜单模式下暂无匹配的热点词汇"
			
 
				+        else:
			
 
				+            mode_text = "暂无匹配的热点词汇"
			
 
				+        text_content += f"📭 {mode_text}\n\n"
			
 
				+
			
 
				+    if report_data["failed_ids"]:
			
 
				+        if "暂无匹配" not in text_content:
			
 
				+            text_content += "\n---\n\n"
			
 
				+
			
 
				+        text_content += "⚠️ **数据获取失败的平台：**\n\n"
			
 
				+        for i, id_value in enumerate(report_data["failed_ids"], 1):
			
 
				+            text_content += f"  • **{id_value}**\n"
			
 
				+
			
 
				+    text_content += f"\n\n> 更新时间：{now.strftime('%Y-%m-%d %H:%M:%S')}"
			
 
				+
			
 
				+    if update_info:
			
 
				+        text_content += f"\n> TrendRadar 发现新版本 **{update_info['remote_version']}**，当前 **{update_info['current_version']}**"
			
 
				+
			
 
				+    return text_content
			
--- a/trendradar/notification/senders.py
+++ b/trendradar/notification/senders.py
@@ -0,0 +1,1033 @@
 
				+# coding=utf-8
			
 
				+"""
			
 
				+消息发送器模块
			
 
				+
			
 
				+将报告数据发送到各种通知渠道：
			
 
				+- 飞书 (Feishu/Lark)
			
 
				+- 钉钉 (DingTalk)
			
 
				+- 企业微信 (WeCom/WeWork)
			
 
				+- Telegram
			
 
				+- 邮件 (Email)
			
 
				+- ntfy
			
 
				+- Bark
			
 
				+- Slack
			
 
				+
			
 
				+每个发送函数都支持分批发送，并通过参数化配置实现与 CONFIG 的解耦。
			
 
				+"""
			
 
				+
			
 
				+import smtplib
			
 
				+import time
			
 
				+from datetime import datetime
			
 
				+from email.header import Header
			
 
				+from email.mime.multipart import MIMEMultipart
			
 
				+from email.mime.text import MIMEText
			
 
				+from email.utils import formataddr, formatdate, make_msgid
			
 
				+from pathlib import Path
			
 
				+from typing import Callable, Dict, List, Optional
			
 
				+from urllib.parse import urlparse
			
 
				+
			
 
				+import requests
			
 
				+
			
 
				+from .batch import add_batch_headers, get_max_batch_header_size
			
 
				+from .formatters import convert_markdown_to_mrkdwn, strip_markdown
			
 
				+
			
 
				+
			
 
				+# === SMTP 邮件配置 ===
			
 
				+SMTP_CONFIGS = {
			
 
				+    # Gmail（使用 STARTTLS）
			
 
				+    "gmail.com": {"server": "smtp.gmail.com", "port": 587, "encryption": "TLS"},
			
 
				+    # QQ邮箱（使用 SSL，更稳定）
			
 
				+    "qq.com": {"server": "smtp.qq.com", "port": 465, "encryption": "SSL"},
			
 
				+    # Outlook（使用 STARTTLS）
			
 
				+    "outlook.com": {"server": "smtp-mail.outlook.com", "port": 587, "encryption": "TLS"},
			
 
				+    "hotmail.com": {"server": "smtp-mail.outlook.com", "port": 587, "encryption": "TLS"},
			
 
				+    "live.com": {"server": "smtp-mail.outlook.com", "port": 587, "encryption": "TLS"},
			
 
				+    # 网易邮箱（使用 SSL，更稳定）
			
 
				+    "163.com": {"server": "smtp.163.com", "port": 465, "encryption": "SSL"},
			
 
				+    "126.com": {"server": "smtp.126.com", "port": 465, "encryption": "SSL"},
			
 
				+    # 新浪邮箱（使用 SSL）
			
 
				+    "sina.com": {"server": "smtp.sina.com", "port": 465, "encryption": "SSL"},
			
 
				+    # 搜狐邮箱（使用 SSL）
			
 
				+    "sohu.com": {"server": "smtp.sohu.com", "port": 465, "encryption": "SSL"},
			
 
				+    # 天翼邮箱（使用 SSL）
			
 
				+    "189.cn": {"server": "smtp.189.cn", "port": 465, "encryption": "SSL"},
			
 
				+    # 阿里云邮箱（使用 TLS）
			
 
				+    "aliyun.com": {"server": "smtp.aliyun.com", "port": 465, "encryption": "TLS"},
			
 
				+}
			
 
				+
			
 
				+
			
 
				+def send_to_feishu(
			
 
				+    webhook_url: str,
			
 
				+    report_data: Dict,
			
 
				+    report_type: str,
			
 
				+    update_info: Optional[Dict] = None,
			
 
				+    proxy_url: Optional[str] = None,
			
 
				+    mode: str = "daily",
			
 
				+    account_label: str = "",
			
 
				+    *,
			
 
				+    batch_size: int = 29000,
			
 
				+    batch_interval: float = 1.0,
			
 
				+    split_content_func: Callable = None,
			
 
				+    get_time_func: Callable = None,
			
 
				+) -> bool:
			
 
				+    """
			
 
				+    发送到飞书（支持分批发送）
			
 
				+
			
 
				+    Args:
			
 
				+        webhook_url: 飞书 Webhook URL
			
 
				+        report_data: 报告数据
			
 
				+        report_type: 报告类型
			
 
				+        update_info: 更新信息（可选）
			
 
				+        proxy_url: 代理 URL（可选）
			
 
				+        mode: 报告模式 (daily/current)
			
 
				+        account_label: 账号标签（多账号时显示）
			
 
				+        batch_size: 批次大小（字节）
			
 
				+        batch_interval: 批次发送间隔（秒）
			
 
				+        split_content_func: 内容分批函数
			
 
				+        get_time_func: 获取当前时间的函数
			
 
				+
			
 
				+    Returns:
			
 
				+        bool: 发送是否成功
			
 
				+    """
			
 
				+    headers = {"Content-Type": "application/json"}
			
 
				+    proxies = None
			
 
				+    if proxy_url:
			
 
				+        proxies = {"http": proxy_url, "https": proxy_url}
			
 
				+
			
 
				+    # 日志前缀
			
 
				+    log_prefix = f"飞书{account_label}" if account_label else "飞书"
			
 
				+
			
 
				+    # 预留批次头部空间，避免添加头部后超限
			
 
				+    header_reserve = get_max_batch_header_size("feishu")
			
 
				+    batches = split_content_func(
			
 
				+        report_data,
			
 
				+        "feishu",
			
 
				+        update_info,
			
 
				+        max_bytes=batch_size - header_reserve,
			
 
				+        mode=mode,
			
 
				+    )
			
 
				+
			
 
				+    # 统一添加批次头部（已预留空间，不会超限）
			
 
				+    batches = add_batch_headers(batches, "feishu", batch_size)
			
 
				+
			
 
				+    print(f"{log_prefix}消息分为 {len(batches)} 批次发送 [{report_type}]")
			
 
				+
			
 
				+    # 逐批发送
			
 
				+    for i, batch_content in enumerate(batches, 1):
			
 
				+        content_size = len(batch_content.encode("utf-8"))
			
 
				+        print(
			
 
				+            f"发送{log_prefix}第 {i}/{len(batches)} 批次，大小：{content_size} 字节 [{report_type}]"
			
 
				+        )
			
 
				+
			
 
				+        total_titles = sum(
			
 
				+            len(stat["titles"]) for stat in report_data["stats"] if stat["count"] > 0
			
 
				+        )
			
 
				+        now = get_time_func() if get_time_func else datetime.now()
			
 
				+
			
 
				+        payload = {
			
 
				+            "msg_type": "text",
			
 
				+            "content": {
			
 
				+                "total_titles": total_titles,
			
 
				+                "timestamp": now.strftime("%Y-%m-%d %H:%M:%S"),
			
 
				+                "report_type": report_type,
			
 
				+                "text": batch_content,
			
 
				+            },
			
 
				+        }
			
 
				+
			
 
				+        try:
			
 
				+            response = requests.post(
			
 
				+                webhook_url, headers=headers, json=payload, proxies=proxies, timeout=30
			
 
				+            )
			
 
				+            if response.status_code == 200:
			
 
				+                result = response.json()
			
 
				+                # 检查飞书的响应状态
			
 
				+                if result.get("StatusCode") == 0 or result.get("code") == 0:
			
 
				+                    print(f"{log_prefix}第 {i}/{len(batches)} 批次发送成功 [{report_type}]")
			
 
				+                    # 批次间间隔
			
 
				+                    if i < len(batches):
			
 
				+                        time.sleep(batch_interval)
			
 
				+                else:
			
 
				+                    error_msg = result.get("msg") or result.get("StatusMessage", "未知错误")
			
 
				+                    print(
			
 
				+                        f"{log_prefix}第 {i}/{len(batches)} 批次发送失败 [{report_type}]，错误：{error_msg}"
			
 
				+                    )
			
 
				+                    return False
			
 
				+            else:
			
 
				+                print(
			
 
				+                    f"{log_prefix}第 {i}/{len(batches)} 批次发送失败 [{report_type}]，状态码：{response.status_code}"
			
 
				+                )
			
 
				+                return False
			
 
				+        except Exception as e:
			
 
				+            print(f"{log_prefix}第 {i}/{len(batches)} 批次发送出错 [{report_type}]：{e}")
			
 
				+            return False
			
 
				+
			
 
				+    print(f"{log_prefix}所有 {len(batches)} 批次发送完成 [{report_type}]")
			
 
				+    return True
			
 
				+
			
 
				+
			
 
				+def send_to_dingtalk(
			
 
				+    webhook_url: str,
			
 
				+    report_data: Dict,
			
 
				+    report_type: str,
			
 
				+    update_info: Optional[Dict] = None,
			
 
				+    proxy_url: Optional[str] = None,
			
 
				+    mode: str = "daily",
			
 
				+    account_label: str = "",
			
 
				+    *,
			
 
				+    batch_size: int = 20000,
			
 
				+    batch_interval: float = 1.0,
			
 
				+    split_content_func: Callable = None,
			
 
				+) -> bool:
			
 
				+    """
			
 
				+    发送到钉钉（支持分批发送）
			
 
				+
			
 
				+    Args:
			
 
				+        webhook_url: 钉钉 Webhook URL
			
 
				+        report_data: 报告数据
			
 
				+        report_type: 报告类型
			
 
				+        update_info: 更新信息（可选）
			
 
				+        proxy_url: 代理 URL（可选）
			
 
				+        mode: 报告模式 (daily/current)
			
 
				+        account_label: 账号标签（多账号时显示）
			
 
				+        batch_size: 批次大小（字节）
			
 
				+        batch_interval: 批次发送间隔（秒）
			
 
				+        split_content_func: 内容分批函数
			
 
				+
			
 
				+    Returns:
			
 
				+        bool: 发送是否成功
			
 
				+    """
			
 
				+    headers = {"Content-Type": "application/json"}
			
 
				+    proxies = None
			
 
				+    if proxy_url:
			
 
				+        proxies = {"http": proxy_url, "https": proxy_url}
			
 
				+
			
 
				+    # 日志前缀
			
 
				+    log_prefix = f"钉钉{account_label}" if account_label else "钉钉"
			
 
				+
			
 
				+    # 预留批次头部空间，避免添加头部后超限
			
 
				+    header_reserve = get_max_batch_header_size("dingtalk")
			
 
				+    batches = split_content_func(
			
 
				+        report_data,
			
 
				+        "dingtalk",
			
 
				+        update_info,
			
 
				+        max_bytes=batch_size - header_reserve,
			
 
				+        mode=mode,
			
 
				+    )
			
 
				+
			
 
				+    # 统一添加批次头部（已预留空间，不会超限）
			
 
				+    batches = add_batch_headers(batches, "dingtalk", batch_size)
			
 
				+
			
 
				+    print(f"{log_prefix}消息分为 {len(batches)} 批次发送 [{report_type}]")
			
 
				+
			
 
				+    # 逐批发送
			
 
				+    for i, batch_content in enumerate(batches, 1):
			
 
				+        content_size = len(batch_content.encode("utf-8"))
			
 
				+        print(
			
 
				+            f"发送{log_prefix}第 {i}/{len(batches)} 批次，大小：{content_size} 字节 [{report_type}]"
			
 
				+        )
			
 
				+
			
 
				+        payload = {
			
 
				+            "msgtype": "markdown",
			
 
				+            "markdown": {
			
 
				+                "title": f"TrendRadar 热点分析报告 - {report_type}",
			
 
				+                "text": batch_content,
			
 
				+            },
			
 
				+        }
			
 
				+
			
 
				+        try:
			
 
				+            response = requests.post(
			
 
				+                webhook_url, headers=headers, json=payload, proxies=proxies, timeout=30
			
 
				+            )
			
 
				+            if response.status_code == 200:
			
 
				+                result = response.json()
			
 
				+                if result.get("errcode") == 0:
			
 
				+                    print(f"{log_prefix}第 {i}/{len(batches)} 批次发送成功 [{report_type}]")
			
 
				+                    # 批次间间隔
			
 
				+                    if i < len(batches):
			
 
				+                        time.sleep(batch_interval)
			
 
				+                else:
			
 
				+                    print(
			
 
				+                        f"{log_prefix}第 {i}/{len(batches)} 批次发送失败 [{report_type}]，错误：{result.get('errmsg')}"
			
 
				+                    )
			
 
				+                    return False
			
 
				+            else:
			
 
				+                print(
			
 
				+                    f"{log_prefix}第 {i}/{len(batches)} 批次发送失败 [{report_type}]，状态码：{response.status_code}"
			
 
				+                )
			
 
				+                return False
			
 
				+        except Exception as e:
			
 
				+            print(f"{log_prefix}第 {i}/{len(batches)} 批次发送出错 [{report_type}]：{e}")
			
 
				+            return False
			
 
				+
			
 
				+    print(f"{log_prefix}所有 {len(batches)} 批次发送完成 [{report_type}]")
			
 
				+    return True
			
 
				+
			
 
				+
			
 
				+def send_to_wework(
			
 
				+    webhook_url: str,
			
 
				+    report_data: Dict,
			
 
				+    report_type: str,
			
 
				+    update_info: Optional[Dict] = None,
			
 
				+    proxy_url: Optional[str] = None,
			
 
				+    mode: str = "daily",
			
 
				+    account_label: str = "",
			
 
				+    *,
			
 
				+    batch_size: int = 4000,
			
 
				+    batch_interval: float = 1.0,
			
 
				+    msg_type: str = "markdown",
			
 
				+    split_content_func: Callable = None,
			
 
				+) -> bool:
			
 
				+    """
			
 
				+    发送到企业微信（支持分批发送，支持 markdown 和 text 两种格式）
			
 
				+
			
 
				+    Args:
			
 
				+        webhook_url: 企业微信 Webhook URL
			
 
				+        report_data: 报告数据
			
 
				+        report_type: 报告类型
			
 
				+        update_info: 更新信息（可选）
			
 
				+        proxy_url: 代理 URL（可选）
			
 
				+        mode: 报告模式 (daily/current)
			
 
				+        account_label: 账号标签（多账号时显示）
			
 
				+        batch_size: 批次大小（字节）
			
 
				+        batch_interval: 批次发送间隔（秒）
			
 
				+        msg_type: 消息类型 (markdown/text)
			
 
				+        split_content_func: 内容分批函数
			
 
				+
			
 
				+    Returns:
			
 
				+        bool: 发送是否成功
			
 
				+    """
			
 
				+    headers = {"Content-Type": "application/json"}
			
 
				+    proxies = None
			
 
				+    if proxy_url:
			
 
				+        proxies = {"http": proxy_url, "https": proxy_url}
			
 
				+
			
 
				+    # 日志前缀
			
 
				+    log_prefix = f"企业微信{account_label}" if account_label else "企业微信"
			
 
				+
			
 
				+    # 获取消息类型配置（markdown 或 text）
			
 
				+    is_text_mode = msg_type.lower() == "text"
			
 
				+
			
 
				+    if is_text_mode:
			
 
				+        print(f"{log_prefix}使用 text 格式（个人微信模式）[{report_type}]")
			
 
				+    else:
			
 
				+        print(f"{log_prefix}使用 markdown 格式（群机器人模式）[{report_type}]")
			
 
				+
			
 
				+    # text 模式使用 wework_text，markdown 模式使用 wework
			
 
				+    header_format_type = "wework_text" if is_text_mode else "wework"
			
 
				+
			
 
				+    # 获取分批内容，预留批次头部空间
			
 
				+    header_reserve = get_max_batch_header_size(header_format_type)
			
 
				+    batches = split_content_func(
			
 
				+        report_data, "wework", update_info, max_bytes=batch_size - header_reserve, mode=mode
			
 
				+    )
			
 
				+
			
 
				+    # 统一添加批次头部（已预留空间，不会超限）
			
 
				+    batches = add_batch_headers(batches, header_format_type, batch_size)
			
 
				+
			
 
				+    print(f"{log_prefix}消息分为 {len(batches)} 批次发送 [{report_type}]")
			
 
				+
			
 
				+    # 逐批发送
			
 
				+    for i, batch_content in enumerate(batches, 1):
			
 
				+        # 根据消息类型构建 payload
			
 
				+        if is_text_mode:
			
 
				+            # text 格式：去除 markdown 语法
			
 
				+            plain_content = strip_markdown(batch_content)
			
 
				+            payload = {"msgtype": "text", "text": {"content": plain_content}}
			
 
				+            content_size = len(plain_content.encode("utf-8"))
			
 
				+        else:
			
 
				+            # markdown 格式：保持原样
			
 
				+            payload = {"msgtype": "markdown", "markdown": {"content": batch_content}}
			
 
				+            content_size = len(batch_content.encode("utf-8"))
			
 
				+
			
 
				+        print(
			
 
				+            f"发送{log_prefix}第 {i}/{len(batches)} 批次，大小：{content_size} 字节 [{report_type}]"
			
 
				+        )
			
 
				+
			
 
				+        try:
			
 
				+            response = requests.post(
			
 
				+                webhook_url, headers=headers, json=payload, proxies=proxies, timeout=30
			
 
				+            )
			
 
				+            if response.status_code == 200:
			
 
				+                result = response.json()
			
 
				+                if result.get("errcode") == 0:
			
 
				+                    print(f"{log_prefix}第 {i}/{len(batches)} 批次发送成功 [{report_type}]")
			
 
				+                    # 批次间间隔
			
 
				+                    if i < len(batches):
			
 
				+                        time.sleep(batch_interval)
			
 
				+                else:
			
 
				+                    print(
			
 
				+                        f"{log_prefix}第 {i}/{len(batches)} 批次发送失败 [{report_type}]，错误：{result.get('errmsg')}"
			
 
				+                    )
			
 
				+                    return False
			
 
				+            else:
			
 
				+                print(
			
 
				+                    f"{log_prefix}第 {i}/{len(batches)} 批次发送失败 [{report_type}]，状态码：{response.status_code}"
			
 
				+                )
			
 
				+                return False
			
 
				+        except Exception as e:
			
 
				+            print(f"{log_prefix}第 {i}/{len(batches)} 批次发送出错 [{report_type}]：{e}")
			
 
				+            return False
			
 
				+
			
 
				+    print(f"{log_prefix}所有 {len(batches)} 批次发送完成 [{report_type}]")
			
 
				+    return True
			
 
				+
			
 
				+
			
 
				+def send_to_telegram(
			
 
				+    bot_token: str,
			
 
				+    chat_id: str,
			
 
				+    report_data: Dict,
			
 
				+    report_type: str,
			
 
				+    update_info: Optional[Dict] = None,
			
 
				+    proxy_url: Optional[str] = None,
			
 
				+    mode: str = "daily",
			
 
				+    account_label: str = "",
			
 
				+    *,
			
 
				+    batch_size: int = 4000,
			
 
				+    batch_interval: float = 1.0,
			
 
				+    split_content_func: Callable = None,
			
 
				+) -> bool:
			
 
				+    """
			
 
				+    发送到 Telegram（支持分批发送）
			
 
				+
			
 
				+    Args:
			
 
				+        bot_token: Telegram Bot Token
			
 
				+        chat_id: Telegram Chat ID
			
 
				+        report_data: 报告数据
			
 
				+        report_type: 报告类型
			
 
				+        update_info: 更新信息（可选）
			
 
				+        proxy_url: 代理 URL（可选）
			
 
				+        mode: 报告模式 (daily/current)
			
 
				+        account_label: 账号标签（多账号时显示）
			
 
				+        batch_size: 批次大小（字节）
			
 
				+        batch_interval: 批次发送间隔（秒）
			
 
				+        split_content_func: 内容分批函数
			
 
				+
			
 
				+    Returns:
			
 
				+        bool: 发送是否成功
			
 
				+    """
			
 
				+    headers = {"Content-Type": "application/json"}
			
 
				+    url = f"https://api.telegram.org/bot{bot_token}/sendMessage"
			
 
				+
			
 
				+    proxies = None
			
 
				+    if proxy_url:
			
 
				+        proxies = {"http": proxy_url, "https": proxy_url}
			
 
				+
			
 
				+    # 日志前缀
			
 
				+    log_prefix = f"Telegram{account_label}" if account_label else "Telegram"
			
 
				+
			
 
				+    # 获取分批内容，预留批次头部空间
			
 
				+    header_reserve = get_max_batch_header_size("telegram")
			
 
				+    batches = split_content_func(
			
 
				+        report_data, "telegram", update_info, max_bytes=batch_size - header_reserve, mode=mode
			
 
				+    )
			
 
				+
			
 
				+    # 统一添加批次头部（已预留空间，不会超限）
			
 
				+    batches = add_batch_headers(batches, "telegram", batch_size)
			
 
				+
			
 
				+    print(f"{log_prefix}消息分为 {len(batches)} 批次发送 [{report_type}]")
			
 
				+
			
 
				+    # 逐批发送
			
 
				+    for i, batch_content in enumerate(batches, 1):
			
 
				+        content_size = len(batch_content.encode("utf-8"))
			
 
				+        print(
			
 
				+            f"发送{log_prefix}第 {i}/{len(batches)} 批次，大小：{content_size} 字节 [{report_type}]"
			
 
				+        )
			
 
				+
			
 
				+        payload = {
			
 
				+            "chat_id": chat_id,
			
 
				+            "text": batch_content,
			
 
				+            "parse_mode": "HTML",
			
 
				+            "disable_web_page_preview": True,
			
 
				+        }
			
 
				+
			
 
				+        try:
			
 
				+            response = requests.post(
			
 
				+                url, headers=headers, json=payload, proxies=proxies, timeout=30
			
 
				+            )
			
 
				+            if response.status_code == 200:
			
 
				+                result = response.json()
			
 
				+                if result.get("ok"):
			
 
				+                    print(f"{log_prefix}第 {i}/{len(batches)} 批次发送成功 [{report_type}]")
			
 
				+                    # 批次间间隔
			
 
				+                    if i < len(batches):
			
 
				+                        time.sleep(batch_interval)
			
 
				+                else:
			
 
				+                    print(
			
 
				+                        f"{log_prefix}第 {i}/{len(batches)} 批次发送失败 [{report_type}]，错误：{result.get('description')}"
			
 
				+                    )
			
 
				+                    return False
			
 
				+            else:
			
 
				+                print(
			
 
				+                    f"{log_prefix}第 {i}/{len(batches)} 批次发送失败 [{report_type}]，状态码：{response.status_code}"
			
 
				+                )
			
 
				+                return False
			
 
				+        except Exception as e:
			
 
				+            print(f"{log_prefix}第 {i}/{len(batches)} 批次发送出错 [{report_type}]：{e}")
			
 
				+            return False
			
 
				+
			
 
				+    print(f"{log_prefix}所有 {len(batches)} 批次发送完成 [{report_type}]")
			
 
				+    return True
			
 
				+
			
 
				+
			
 
				+def send_to_email(
			
 
				+    from_email: str,
			
 
				+    password: str,
			
 
				+    to_email: str,
			
 
				+    report_type: str,
			
 
				+    html_file_path: str,
			
 
				+    custom_smtp_server: Optional[str] = None,
			
 
				+    custom_smtp_port: Optional[int] = None,
			
 
				+    *,
			
 
				+    get_time_func: Callable = None,
			
 
				+) -> bool:
			
 
				+    """
			
 
				+    发送邮件通知
			
 
				+
			
 
				+    Args:
			
 
				+        from_email: 发件人邮箱
			
 
				+        password: 邮箱密码/授权码
			
 
				+        to_email: 收件人邮箱（多个用逗号分隔）
			
 
				+        report_type: 报告类型
			
 
				+        html_file_path: HTML 报告文件路径
			
 
				+        custom_smtp_server: 自定义 SMTP 服务器（可选）
			
 
				+        custom_smtp_port: 自定义 SMTP 端口（可选）
			
 
				+        get_time_func: 获取当前时间的函数
			
 
				+
			
 
				+    Returns:
			
 
				+        bool: 发送是否成功
			
 
				+    """
			
 
				+    try:
			
 
				+        if not html_file_path or not Path(html_file_path).exists():
			
 
				+            print(f"错误：HTML文件不存在或未提供: {html_file_path}")
			
 
				+            return False
			
 
				+
			
 
				+        print(f"使用HTML文件: {html_file_path}")
			
 
				+        with open(html_file_path, "r", encoding="utf-8") as f:
			
 
				+            html_content = f.read()
			
 
				+
			
 
				+        domain = from_email.split("@")[-1].lower()
			
 
				+
			
 
				+        if custom_smtp_server and custom_smtp_port:
			
 
				+            # 使用自定义 SMTP 配置
			
 
				+            smtp_server = custom_smtp_server
			
 
				+            smtp_port = int(custom_smtp_port)
			
 
				+            # 根据端口判断加密方式：465=SSL, 587=TLS
			
 
				+            if smtp_port == 465:
			
 
				+                use_tls = False  # SSL 模式（SMTP_SSL）
			
 
				+            elif smtp_port == 587:
			
 
				+                use_tls = True  # TLS 模式（STARTTLS）
			
 
				+            else:
			
 
				+                # 其他端口优先尝试 TLS（更安全，更广泛支持）
			
 
				+                use_tls = True
			
 
				+        elif domain in SMTP_CONFIGS:
			
 
				+            # 使用预设配置
			
 
				+            config = SMTP_CONFIGS[domain]
			
 
				+            smtp_server = config["server"]
			
 
				+            smtp_port = config["port"]
			
 
				+            use_tls = config["encryption"] == "TLS"
			
 
				+        else:
			
 
				+            print(f"未识别的邮箱服务商: {domain}，使用通用 SMTP 配置")
			
 
				+            smtp_server = f"smtp.{domain}"
			
 
				+            smtp_port = 587
			
 
				+            use_tls = True
			
 
				+
			
 
				+        msg = MIMEMultipart("alternative")
			
 
				+
			
 
				+        # 严格按照 RFC 标准设置 From header
			
 
				+        sender_name = "TrendRadar"
			
 
				+        msg["From"] = formataddr((sender_name, from_email))
			
 
				+
			
 
				+        # 设置收件人
			
 
				+        recipients = [addr.strip() for addr in to_email.split(",")]
			
 
				+        if len(recipients) == 1:
			
 
				+            msg["To"] = recipients[0]
			
 
				+        else:
			
 
				+            msg["To"] = ", ".join(recipients)
			
 
				+
			
 
				+        # 设置邮件主题
			
 
				+        now = get_time_func() if get_time_func else datetime.now()
			
 
				+        subject = f"TrendRadar 热点分析报告 - {report_type} - {now.strftime('%m月%d日 %H:%M')}"
			
 
				+        msg["Subject"] = Header(subject, "utf-8")
			
 
				+
			
 
				+        # 设置其他标准 header
			
 
				+        msg["MIME-Version"] = "1.0"
			
 
				+        msg["Date"] = formatdate(localtime=True)
			
 
				+        msg["Message-ID"] = make_msgid()
			
 
				+
			
 
				+        # 添加纯文本部分（作为备选）
			
 
				+        text_content = f"""
			
 
				+TrendRadar 热点分析报告
			
 
				+========================
			
 
				+报告类型：{report_type}
			
 
				+生成时间：{now.strftime('%Y-%m-%d %H:%M:%S')}
			
 
				+
			
 
				+请使用支持HTML的邮件客户端查看完整报告内容。
			
 
				+        """
			
 
				+        text_part = MIMEText(text_content, "plain", "utf-8")
			
 
				+        msg.attach(text_part)
			
 
				+
			
 
				+        html_part = MIMEText(html_content, "html", "utf-8")
			
 
				+        msg.attach(html_part)
			
 
				+
			
 
				+        print(f"正在发送邮件到 {to_email}...")
			
 
				+        print(f"SMTP 服务器: {smtp_server}:{smtp_port}")
			
 
				+        print(f"发件人: {from_email}")
			
 
				+
			
 
				+        try:
			
 
				+            if use_tls:
			
 
				+                # TLS 模式
			
 
				+                server = smtplib.SMTP(smtp_server, smtp_port, timeout=30)
			
 
				+                server.set_debuglevel(0)  # 设为1可以查看详细调试信息
			
 
				+                server.ehlo()
			
 
				+                server.starttls()
			
 
				+                server.ehlo()
			
 
				+            else:
			
 
				+                # SSL 模式
			
 
				+                server = smtplib.SMTP_SSL(smtp_server, smtp_port, timeout=30)
			
 
				+                server.set_debuglevel(0)
			
 
				+                server.ehlo()
			
 
				+
			
 
				+            # 登录
			
 
				+            server.login(from_email, password)
			
 
				+
			
 
				+            # 发送邮件
			
 
				+            server.send_message(msg)
			
 
				+            server.quit()
			
 
				+
			
 
				+            print(f"邮件发送成功 [{report_type}] -> {to_email}")
			
 
				+            return True
			
 
				+
			
 
				+        except smtplib.SMTPServerDisconnected:
			
 
				+            print("邮件发送失败：服务器意外断开连接，请检查网络或稍后重试")
			
 
				+            return False
			
 
				+
			
 
				+    except smtplib.SMTPAuthenticationError as e:
			
 
				+        print("邮件发送失败：认证错误，请检查邮箱和密码/授权码")
			
 
				+        print(f"详细错误: {str(e)}")
			
 
				+        return False
			
 
				+    except smtplib.SMTPRecipientsRefused as e:
			
 
				+        print(f"邮件发送失败：收件人地址被拒绝 {e}")
			
 
				+        return False
			
 
				+    except smtplib.SMTPSenderRefused as e:
			
 
				+        print(f"邮件发送失败：发件人地址被拒绝 {e}")
			
 
				+        return False
			
 
				+    except smtplib.SMTPDataError as e:
			
 
				+        print(f"邮件发送失败：邮件数据错误 {e}")
			
 
				+        return False
			
 
				+    except smtplib.SMTPConnectError as e:
			
 
				+        print(f"邮件发送失败：无法连接到 SMTP 服务器 {smtp_server}:{smtp_port}")
			
 
				+        print(f"详细错误: {str(e)}")
			
 
				+        return False
			
 
				+    except Exception as e:
			
 
				+        print(f"邮件发送失败 [{report_type}]：{e}")
			
 
				+        import traceback
			
 
				+        traceback.print_exc()
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				+def send_to_ntfy(
			
 
				+    server_url: str,
			
 
				+    topic: str,
			
 
				+    token: Optional[str],
			
 
				+    report_data: Dict,
			
 
				+    report_type: str,
			
 
				+    update_info: Optional[Dict] = None,
			
 
				+    proxy_url: Optional[str] = None,
			
 
				+    mode: str = "daily",
			
 
				+    account_label: str = "",
			
 
				+    *,
			
 
				+    batch_size: int = 3800,
			
 
				+    split_content_func: Callable = None,
			
 
				+) -> bool:
			
 
				+    """
			
 
				+    发送到 ntfy（支持分批发送，严格遵守4KB限制）
			
 
				+
			
 
				+    Args:
			
 
				+        server_url: ntfy 服务器 URL
			
 
				+        topic: ntfy 主题
			
 
				+        token: ntfy 访问令牌（可选）
			
 
				+        report_data: 报告数据
			
 
				+        report_type: 报告类型
			
 
				+        update_info: 更新信息（可选）
			
 
				+        proxy_url: 代理 URL（可选）
			
 
				+        mode: 报告模式 (daily/current)
			
 
				+        account_label: 账号标签（多账号时显示）
			
 
				+        batch_size: 批次大小（字节）
			
 
				+        split_content_func: 内容分批函数
			
 
				+
			
 
				+    Returns:
			
 
				+        bool: 发送是否成功
			
 
				+    """
			
 
				+    # 日志前缀
			
 
				+    log_prefix = f"ntfy{account_label}" if account_label else "ntfy"
			
 
				+
			
 
				+    # 避免 HTTP header 编码问题
			
 
				+    report_type_en_map = {
			
 
				+        "当日汇总": "Daily Summary",
			
 
				+        "当前榜单汇总": "Current Ranking",
			
 
				+        "增量更新": "Incremental Update",
			
 
				+        "实时增量": "Realtime Incremental",
			
 
				+        "实时当前榜单": "Realtime Current Ranking",
			
 
				+    }
			
 
				+    report_type_en = report_type_en_map.get(report_type, "News Report")
			
 
				+
			
 
				+    headers = {
			
 
				+        "Content-Type": "text/plain; charset=utf-8",
			
 
				+        "Markdown": "yes",
			
 
				+        "Title": report_type_en,
			
 
				+        "Priority": "default",
			
 
				+        "Tags": "news",
			
 
				+    }
			
 
				+
			
 
				+    if token:
			
 
				+        headers["Authorization"] = f"Bearer {token}"
			
 
				+
			
 
				+    # 构建完整URL，确保格式正确
			
 
				+    base_url = server_url.rstrip("/")
			
 
				+    if not base_url.startswith(("http://", "https://")):
			
 
				+        base_url = f"https://{base_url}"
			
 
				+    url = f"{base_url}/{topic}"
			
 
				+
			
 
				+    proxies = None
			
 
				+    if proxy_url:
			
 
				+        proxies = {"http": proxy_url, "https": proxy_url}
			
 
				+
			
 
				+    # 获取分批内容，预留批次头部空间
			
 
				+    header_reserve = get_max_batch_header_size("ntfy")
			
 
				+    batches = split_content_func(
			
 
				+        report_data, "ntfy", update_info, max_bytes=batch_size - header_reserve, mode=mode
			
 
				+    )
			
 
				+
			
 
				+    # 统一添加批次头部（已预留空间，不会超限）
			
 
				+    batches = add_batch_headers(batches, "ntfy", batch_size)
			
 
				+
			
 
				+    total_batches = len(batches)
			
 
				+    print(f"{log_prefix}消息分为 {total_batches} 批次发送 [{report_type}]")
			
 
				+
			
 
				+    # 反转批次顺序，使得在ntfy客户端显示时顺序正确
			
 
				+    # ntfy显示最新消息在上面，所以我们从最后一批开始推送
			
 
				+    reversed_batches = list(reversed(batches))
			
 
				+
			
 
				+    print(f"{log_prefix}将按反向顺序推送（最后批次先推送），确保客户端显示顺序正确")
			
 
				+
			
 
				+    # 逐批发送（反向顺序）
			
 
				+    success_count = 0
			
 
				+    for idx, batch_content in enumerate(reversed_batches, 1):
			
 
				+        # 计算正确的批次编号（用户视角的编号）
			
 
				+        actual_batch_num = total_batches - idx + 1
			
 
				+
			
 
				+        content_size = len(batch_content.encode("utf-8"))
			
 
				+        print(
			
 
				+            f"发送{log_prefix}第 {actual_batch_num}/{total_batches} 批次（推送顺序: {idx}/{total_batches}），大小：{content_size} 字节 [{report_type}]"
			
 
				+        )
			
 
				+
			
 
				+        # 检查消息大小，确保不超过4KB
			
 
				+        if content_size > 4096:
			
 
				+            print(f"警告：{log_prefix}第 {actual_batch_num} 批次消息过大（{content_size} 字节），可能被拒绝")
			
 
				+
			
 
				+        # 更新 headers 的批次标识
			
 
				+        current_headers = headers.copy()
			
 
				+        if total_batches > 1:
			
 
				+            current_headers["Title"] = f"{report_type_en} ({actual_batch_num}/{total_batches})"
			
 
				+
			
 
				+        try:
			
 
				+            response = requests.post(
			
 
				+                url,
			
 
				+                headers=current_headers,
			
 
				+                data=batch_content.encode("utf-8"),
			
 
				+                proxies=proxies,
			
 
				+                timeout=30,
			
 
				+            )
			
 
				+
			
 
				+            if response.status_code == 200:
			
 
				+                print(f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次发送成功 [{report_type}]")
			
 
				+                success_count += 1
			
 
				+                if idx < total_batches:
			
 
				+                    # 公共服务器建议 2-3 秒，自托管可以更短
			
 
				+                    interval = 2 if "ntfy.sh" in server_url else 1
			
 
				+                    time.sleep(interval)
			
 
				+            elif response.status_code == 429:
			
 
				+                print(
			
 
				+                    f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次速率限制 [{report_type}]，等待后重试"
			
 
				+                )
			
 
				+                time.sleep(10)  # 等待10秒后重试
			
 
				+                # 重试一次
			
 
				+                retry_response = requests.post(
			
 
				+                    url,
			
 
				+                    headers=current_headers,
			
 
				+                    data=batch_content.encode("utf-8"),
			
 
				+                    proxies=proxies,
			
 
				+                    timeout=30,
			
 
				+                )
			
 
				+                if retry_response.status_code == 200:
			
 
				+                    print(f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次重试成功 [{report_type}]")
			
 
				+                    success_count += 1
			
 
				+                else:
			
 
				+                    print(
			
 
				+                        f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次重试失败，状态码：{retry_response.status_code}"
			
 
				+                    )
			
 
				+            elif response.status_code == 413:
			
 
				+                print(
			
 
				+                    f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次消息过大被拒绝 [{report_type}]，消息大小：{content_size} 字节"
			
 
				+                )
			
 
				+            else:
			
 
				+                print(
			
 
				+                    f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次发送失败 [{report_type}]，状态码：{response.status_code}"
			
 
				+                )
			
 
				+                try:
			
 
				+                    print(f"错误详情：{response.text}")
			
 
				+                except:
			
 
				+                    pass
			
 
				+
			
 
				+        except requests.exceptions.ConnectTimeout:
			
 
				+            print(f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次连接超时 [{report_type}]")
			
 
				+        except requests.exceptions.ReadTimeout:
			
 
				+            print(f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次读取超时 [{report_type}]")
			
 
				+        except requests.exceptions.ConnectionError as e:
			
 
				+            print(f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次连接错误 [{report_type}]：{e}")
			
 
				+        except Exception as e:
			
 
				+            print(f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次发送异常 [{report_type}]：{e}")
			
 
				+
			
 
				+    # 判断整体发送是否成功
			
 
				+    if success_count == total_batches:
			
 
				+        print(f"{log_prefix}所有 {total_batches} 批次发送完成 [{report_type}]")
			
 
				+        return True
			
 
				+    elif success_count > 0:
			
 
				+        print(f"{log_prefix}部分发送成功：{success_count}/{total_batches} 批次 [{report_type}]")
			
 
				+        return True  # 部分成功也视为成功
			
 
				+    else:
			
 
				+        print(f"{log_prefix}发送完全失败 [{report_type}]")
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				+def send_to_bark(
			
 
				+    bark_url: str,
			
 
				+    report_data: Dict,
			
 
				+    report_type: str,
			
 
				+    update_info: Optional[Dict] = None,
			
 
				+    proxy_url: Optional[str] = None,
			
 
				+    mode: str = "daily",
			
 
				+    account_label: str = "",
			
 
				+    *,
			
 
				+    batch_size: int = 3600,
			
 
				+    batch_interval: float = 1.0,
			
 
				+    split_content_func: Callable = None,
			
 
				+) -> bool:
			
 
				+    """
			
 
				+    发送到 Bark（支持分批发送，使用 markdown 格式）
			
 
				+
			
 
				+    Args:
			
 
				+        bark_url: Bark URL（包含 device_key）
			
 
				+        report_data: 报告数据
			
 
				+        report_type: 报告类型
			
 
				+        update_info: 更新信息（可选）
			
 
				+        proxy_url: 代理 URL（可选）
			
 
				+        mode: 报告模式 (daily/current)
			
 
				+        account_label: 账号标签（多账号时显示）
			
 
				+        batch_size: 批次大小（字节）
			
 
				+        batch_interval: 批次发送间隔（秒）
			
 
				+        split_content_func: 内容分批函数
			
 
				+
			
 
				+    Returns:
			
 
				+        bool: 发送是否成功
			
 
				+    """
			
 
				+    # 日志前缀
			
 
				+    log_prefix = f"Bark{account_label}" if account_label else "Bark"
			
 
				+
			
 
				+    proxies = None
			
 
				+    if proxy_url:
			
 
				+        proxies = {"http": proxy_url, "https": proxy_url}
			
 
				+
			
 
				+    # 解析 Bark URL，提取 device_key 和 API 端点
			
 
				+    # Bark URL 格式: https://api.day.app/device_key 或 https://bark.day.app/device_key
			
 
				+    parsed_url = urlparse(bark_url)
			
 
				+    device_key = parsed_url.path.strip('/').split('/')[0] if parsed_url.path else None
			
 
				+
			
 
				+    if not device_key:
			
 
				+        print(f"{log_prefix} URL 格式错误，无法提取 device_key: {bark_url}")
			
 
				+        return False
			
 
				+
			
 
				+    # 构建正确的 API 端点
			
 
				+    api_endpoint = f"{parsed_url.scheme}://{parsed_url.netloc}/push"
			
 
				+
			
 
				+    # 获取分批内容，预留批次头部空间
			
 
				+    header_reserve = get_max_batch_header_size("bark")
			
 
				+    batches = split_content_func(
			
 
				+        report_data, "bark", update_info, max_bytes=batch_size - header_reserve, mode=mode
			
 
				+    )
			
 
				+
			
 
				+    # 统一添加批次头部（已预留空间，不会超限）
			
 
				+    batches = add_batch_headers(batches, "bark", batch_size)
			
 
				+
			
 
				+    total_batches = len(batches)
			
 
				+    print(f"{log_prefix}消息分为 {total_batches} 批次发送 [{report_type}]")
			
 
				+
			
 
				+    # 反转批次顺序，使得在Bark客户端显示时顺序正确
			
 
				+    # Bark显示最新消息在上面，所以我们从最后一批开始推送
			
 
				+    reversed_batches = list(reversed(batches))
			
 
				+
			
 
				+    print(f"{log_prefix}将按反向顺序推送（最后批次先推送），确保客户端显示顺序正确")
			
 
				+
			
 
				+    # 逐批发送（反向顺序）
			
 
				+    success_count = 0
			
 
				+    for idx, batch_content in enumerate(reversed_batches, 1):
			
 
				+        # 计算正确的批次编号（用户视角的编号）
			
 
				+        actual_batch_num = total_batches - idx + 1
			
 
				+
			
 
				+        content_size = len(batch_content.encode("utf-8"))
			
 
				+        print(
			
 
				+            f"发送{log_prefix}第 {actual_batch_num}/{total_batches} 批次（推送顺序: {idx}/{total_batches}），大小：{content_size} 字节 [{report_type}]"
			
 
				+        )
			
 
				+
			
 
				+        # 检查消息大小（Bark使用APNs，限制4KB）
			
 
				+        if content_size > 4096:
			
 
				+            print(
			
 
				+                f"警告：{log_prefix}第 {actual_batch_num}/{total_batches} 批次消息过大（{content_size} 字节），可能被拒绝"
			
 
				+            )
			
 
				+
			
 
				+        # 构建JSON payload
			
 
				+        payload = {
			
 
				+            "title": report_type,
			
 
				+            "markdown": batch_content,
			
 
				+            "device_key": device_key,
			
 
				+            "sound": "default",
			
 
				+            "group": "TrendRadar",
			
 
				+            "action": "none",  # 点击推送跳到 APP 不弹出弹框,方便阅读
			
 
				+        }
			
 
				+
			
 
				+        try:
			
 
				+            response = requests.post(
			
 
				+                api_endpoint,
			
 
				+                json=payload,
			
 
				+                proxies=proxies,
			
 
				+                timeout=30,
			
 
				+            )
			
 
				+
			
 
				+            if response.status_code == 200:
			
 
				+                result = response.json()
			
 
				+                if result.get("code") == 200:
			
 
				+                    print(f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次发送成功 [{report_type}]")
			
 
				+                    success_count += 1
			
 
				+                    # 批次间间隔
			
 
				+                    if idx < total_batches:
			
 
				+                        time.sleep(batch_interval)
			
 
				+                else:
			
 
				+                    print(
			
 
				+                        f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次发送失败 [{report_type}]，错误：{result.get('message', '未知错误')}"
			
 
				+                    )
			
 
				+            else:
			
 
				+                print(
			
 
				+                    f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次发送失败 [{report_type}]，状态码：{response.status_code}"
			
 
				+                )
			
 
				+                try:
			
 
				+                    print(f"错误详情：{response.text}")
			
 
				+                except:
			
 
				+                    pass
			
 
				+
			
 
				+        except requests.exceptions.ConnectTimeout:
			
 
				+            print(f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次连接超时 [{report_type}]")
			
 
				+        except requests.exceptions.ReadTimeout:
			
 
				+            print(f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次读取超时 [{report_type}]")
			
 
				+        except requests.exceptions.ConnectionError as e:
			
 
				+            print(f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次连接错误 [{report_type}]：{e}")
			
 
				+        except Exception as e:
			
 
				+            print(f"{log_prefix}第 {actual_batch_num}/{total_batches} 批次发送异常 [{report_type}]：{e}")
			
 
				+
			
 
				+    # 判断整体发送是否成功
			
 
				+    if success_count == total_batches:
			
 
				+        print(f"{log_prefix}所有 {total_batches} 批次发送完成 [{report_type}]")
			
 
				+        return True
			
 
				+    elif success_count > 0:
			
 
				+        print(f"{log_prefix}部分发送成功：{success_count}/{total_batches} 批次 [{report_type}]")
			
 
				+        return True  # 部分成功也视为成功
			
 
				+    else:
			
 
				+        print(f"{log_prefix}发送完全失败 [{report_type}]")
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				+def send_to_slack(
			
 
				+    webhook_url: str,
			
 
				+    report_data: Dict,
			
 
				+    report_type: str,
			
 
				+    update_info: Optional[Dict] = None,
			
 
				+    proxy_url: Optional[str] = None,
			
 
				+    mode: str = "daily",
			
 
				+    account_label: str = "",
			
 
				+    *,
			
 
				+    batch_size: int = 4000,
			
 
				+    batch_interval: float = 1.0,
			
 
				+    split_content_func: Callable = None,
			
 
				+) -> bool:
			
 
				+    """
			
 
				+    发送到 Slack（支持分批发送，使用 mrkdwn 格式）
			
 
				+
			
 
				+    Args:
			
 
				+        webhook_url: Slack Webhook URL
			
 
				+        report_data: 报告数据
			
 
				+        report_type: 报告类型
			
 
				+        update_info: 更新信息（可选）
			
 
				+        proxy_url: 代理 URL（可选）
			
 
				+        mode: 报告模式 (daily/current)
			
 
				+        account_label: 账号标签（多账号时显示）
			
 
				+        batch_size: 批次大小（字节）
			
 
				+        batch_interval: 批次发送间隔（秒）
			
 
				+        split_content_func: 内容分批函数
			
 
				+
			
 
				+    Returns:
			
 
				+        bool: 发送是否成功
			
 
				+    """
			
 
				+    headers = {"Content-Type": "application/json"}
			
 
				+    proxies = None
			
 
				+    if proxy_url:
			
 
				+        proxies = {"http": proxy_url, "https": proxy_url}
			
 
				+
			
 
				+    # 日志前缀
			
 
				+    log_prefix = f"Slack{account_label}" if account_label else "Slack"
			
 
				+
			
 
				+    # 获取分批内容，预留批次头部空间
			
 
				+    header_reserve = get_max_batch_header_size("slack")
			
 
				+    batches = split_content_func(
			
 
				+        report_data, "slack", update_info, max_bytes=batch_size - header_reserve, mode=mode
			
 
				+    )
			
 
				+
			
 
				+    # 统一添加批次头部（已预留空间，不会超限）
			
 
				+    batches = add_batch_headers(batches, "slack", batch_size)
			
 
				+
			
 
				+    print(f"{log_prefix}消息分为 {len(batches)} 批次发送 [{report_type}]")
			
 
				+
			
 
				+    # 逐批发送
			
 
				+    for i, batch_content in enumerate(batches, 1):
			
 
				+        # 转换 Markdown 到 mrkdwn 格式
			
 
				+        mrkdwn_content = convert_markdown_to_mrkdwn(batch_content)
			
 
				+
			
 
				+        content_size = len(mrkdwn_content.encode("utf-8"))
			
 
				+        print(
			
 
				+            f"发送{log_prefix}第 {i}/{len(batches)} 批次，大小：{content_size} 字节 [{report_type}]"
			
 
				+        )
			
 
				+
			
 
				+        # 构建 Slack payload（使用简单的 text 字段，支持 mrkdwn）
			
 
				+        payload = {"text": mrkdwn_content}
			
 
				+
			
 
				+        try:
			
 
				+            response = requests.post(
			
 
				+                webhook_url, headers=headers, json=payload, proxies=proxies, timeout=30
			
 
				+            )
			
 
				+
			
 
				+            # Slack Incoming Webhooks 成功时返回 "ok" 文本
			
 
				+            if response.status_code == 200 and response.text == "ok":
			
 
				+                print(f"{log_prefix}第 {i}/{len(batches)} 批次发送成功 [{report_type}]")
			
 
				+                # 批次间间隔
			
 
				+                if i < len(batches):
			
 
				+                    time.sleep(batch_interval)
			
 
				+            else:
			
 
				+                error_msg = response.text if response.text else f"状态码：{response.status_code}"
			
 
				+                print(
			
 
				+                    f"{log_prefix}第 {i}/{len(batches)} 批次发送失败 [{report_type}]，错误：{error_msg}"
			
 
				+                )
			
 
				+                return False
			
 
				+        except Exception as e:
			
 
				+            print(f"{log_prefix}第 {i}/{len(batches)} 批次发送出错 [{report_type}]：{e}")
			
 
				+            return False
			
 
				+
			
 
				+    print(f"{log_prefix}所有 {len(batches)} 批次发送完成 [{report_type}]")
			
 
				+    return True
			
--- a/trendradar/notification/splitter.py
+++ b/trendradar/notification/splitter.py
@@ -0,0 +1,580 @@
 
				+# coding=utf-8
			
 
				+"""
			
 
				+消息分批处理模块
			
 
				+
			
 
				+提供消息内容分批拆分功能，确保消息大小不超过各平台限制
			
 
				+"""
			
 
				+
			
 
				+from datetime import datetime
			
 
				+from typing import Dict, List, Optional, Callable
			
 
				+
			
 
				+from trendradar.report.formatter import format_title_for_platform
			
 
				+
			
 
				+
			
 
				+# 默认批次大小配置
			
 
				+DEFAULT_BATCH_SIZES = {
			
 
				+    "dingtalk": 20000,
			
 
				+    "feishu": 29000,
			
 
				+    "ntfy": 3800,
			
 
				+    "default": 4000,
			
 
				+}
			
 
				+
			
 
				+
			
 
				+def split_content_into_batches(
			
 
				+    report_data: Dict,
			
 
				+    format_type: str,
			
 
				+    update_info: Optional[Dict] = None,
			
 
				+    max_bytes: Optional[int] = None,
			
 
				+    mode: str = "daily",
			
 
				+    batch_sizes: Optional[Dict[str, int]] = None,
			
 
				+    feishu_separator: str = "---",
			
 
				+    reverse_content_order: bool = False,
			
 
				+    get_time_func: Optional[Callable[[], datetime]] = None,
			
 
				+) -> List[str]:
			
 
				+    """分批处理消息内容，确保词组标题+至少第一条新闻的完整性
			
 
				+
			
 
				+    Args:
			
 
				+        report_data: 报告数据字典，包含 stats, new_titles, failed_ids, total_new_count
			
 
				+        format_type: 格式类型 (feishu, dingtalk, wework, telegram, ntfy, bark, slack)
			
 
				+        update_info: 版本更新信息（可选）
			
 
				+        max_bytes: 最大字节数（可选，如果不指定则使用默认配置）
			
 
				+        mode: 报告模式 (daily, incremental, current)
			
 
				+        batch_sizes: 批次大小配置字典（可选）
			
 
				+        feishu_separator: 飞书消息分隔符
			
 
				+        reverse_content_order: 是否反转内容顺序（新增在前）
			
 
				+        get_time_func: 获取当前时间的函数（可选）
			
 
				+
			
 
				+    Returns:
			
 
				+        分批后的消息内容列表
			
 
				+    """
			
 
				+    # 合并批次大小配置
			
 
				+    sizes = {**DEFAULT_BATCH_SIZES, **(batch_sizes or {})}
			
 
				+
			
 
				+    if max_bytes is None:
			
 
				+        if format_type == "dingtalk":
			
 
				+            max_bytes = sizes.get("dingtalk", 20000)
			
 
				+        elif format_type == "feishu":
			
 
				+            max_bytes = sizes.get("feishu", 29000)
			
 
				+        elif format_type == "ntfy":
			
 
				+            max_bytes = sizes.get("ntfy", 3800)
			
 
				+        else:
			
 
				+            max_bytes = sizes.get("default", 4000)
			
 
				+
			
 
				+    batches = []
			
 
				+
			
 
				+    total_titles = sum(
			
 
				+        len(stat["titles"]) for stat in report_data["stats"] if stat["count"] > 0
			
 
				+    )
			
 
				+    now = get_time_func() if get_time_func else datetime.now()
			
 
				+
			
 
				+    base_header = ""
			
 
				+    if format_type in ("wework", "bark"):
			
 
				+        base_header = f"**总新闻数：** {total_titles}\n\n\n\n"
			
 
				+    elif format_type == "telegram":
			
 
				+        base_header = f"总新闻数： {total_titles}\n\n"
			
 
				+    elif format_type == "ntfy":
			
 
				+        base_header = f"**总新闻数：** {total_titles}\n\n"
			
 
				+    elif format_type == "feishu":
			
 
				+        base_header = ""
			
 
				+    elif format_type == "dingtalk":
			
 
				+        base_header = f"**总新闻数：** {total_titles}\n\n"
			
 
				+        base_header += f"**时间：** {now.strftime('%Y-%m-%d %H:%M:%S')}\n\n"
			
 
				+        base_header += f"**类型：** 热点分析报告\n\n"
			
 
				+        base_header += "---\n\n"
			
 
				+    elif format_type == "slack":
			
 
				+        base_header = f"*总新闻数：* {total_titles}\n\n"
			
 
				+
			
 
				+    base_footer = ""
			
 
				+    if format_type in ("wework", "bark"):
			
 
				+        base_footer = f"\n\n\n> 更新时间：{now.strftime('%Y-%m-%d %H:%M:%S')}"
			
 
				+        if update_info:
			
 
				+            base_footer += f"\n> TrendRadar 发现新版本 **{update_info['remote_version']}**，当前 **{update_info['current_version']}**"
			
 
				+    elif format_type == "telegram":
			
 
				+        base_footer = f"\n\n更新时间：{now.strftime('%Y-%m-%d %H:%M:%S')}"
			
 
				+        if update_info:
			
 
				+            base_footer += f"\nTrendRadar 发现新版本 {update_info['remote_version']}，当前 {update_info['current_version']}"
			
 
				+    elif format_type == "ntfy":
			
 
				+        base_footer = f"\n\n> 更新时间：{now.strftime('%Y-%m-%d %H:%M:%S')}"
			
 
				+        if update_info:
			
 
				+            base_footer += f"\n> TrendRadar 发现新版本 **{update_info['remote_version']}**，当前 **{update_info['current_version']}**"
			
 
				+    elif format_type == "feishu":
			
 
				+        base_footer = f"\n\n<font color='grey'>更新时间：{now.strftime('%Y-%m-%d %H:%M:%S')}</font>"
			
 
				+        if update_info:
			
 
				+            base_footer += f"\n<font color='grey'>TrendRadar 发现新版本 {update_info['remote_version']}，当前 {update_info['current_version']}</font>"
			
 
				+    elif format_type == "dingtalk":
			
 
				+        base_footer = f"\n\n> 更新时间：{now.strftime('%Y-%m-%d %H:%M:%S')}"
			
 
				+        if update_info:
			
 
				+            base_footer += f"\n> TrendRadar 发现新版本 **{update_info['remote_version']}**，当前 **{update_info['current_version']}**"
			
 
				+    elif format_type == "slack":
			
 
				+        base_footer = f"\n\n_更新时间：{now.strftime('%Y-%m-%d %H:%M:%S')}_"
			
 
				+        if update_info:
			
 
				+            base_footer += f"\n_TrendRadar 发现新版本 *{update_info['remote_version']}*，当前 *{update_info['current_version']}_"
			
 
				+
			
 
				+    stats_header = ""
			
 
				+    if report_data["stats"]:
			
 
				+        if format_type in ("wework", "bark"):
			
 
				+            stats_header = f"📊 **热点词汇统计**\n\n"
			
 
				+        elif format_type == "telegram":
			
 
				+            stats_header = f"📊 热点词汇统计\n\n"
			
 
				+        elif format_type == "ntfy":
			
 
				+            stats_header = f"📊 **热点词汇统计**\n\n"
			
 
				+        elif format_type == "feishu":
			
 
				+            stats_header = f"📊 **热点词汇统计**\n\n"
			
 
				+        elif format_type == "dingtalk":
			
 
				+            stats_header = f"📊 **热点词汇统计**\n\n"
			
 
				+        elif format_type == "slack":
			
 
				+            stats_header = f"📊 *热点词汇统计*\n\n"
			
 
				+
			
 
				+    current_batch = base_header
			
 
				+    current_batch_has_content = False
			
 
				+
			
 
				+    if (
			
 
				+        not report_data["stats"]
			
 
				+        and not report_data["new_titles"]
			
 
				+        and not report_data["failed_ids"]
			
 
				+    ):
			
 
				+        if mode == "incremental":
			
 
				+            mode_text = "增量模式下暂无新增匹配的热点词汇"
			
 
				+        elif mode == "current":
			
 
				+            mode_text = "当前榜单模式下暂无匹配的热点词汇"
			
 
				+        else:
			
 
				+            mode_text = "暂无匹配的热点词汇"
			
 
				+        simple_content = f"📭 {mode_text}\n\n"
			
 
				+        final_content = base_header + simple_content + base_footer
			
 
				+        batches.append(final_content)
			
 
				+        return batches
			
 
				+
			
 
				+    # 定义处理热点词汇统计的函数
			
 
				+    def process_stats_section(current_batch, current_batch_has_content, batches):
			
 
				+        """处理热点词汇统计"""
			
 
				+        if not report_data["stats"]:
			
 
				+            return current_batch, current_batch_has_content, batches
			
 
				+
			
 
				+        total_count = len(report_data["stats"])
			
 
				+
			
 
				+        # 添加统计标题
			
 
				+        test_content = current_batch + stats_header
			
 
				+        if (
			
 
				+            len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
			
 
				+            < max_bytes
			
 
				+        ):
			
 
				+            current_batch = test_content
			
 
				+            current_batch_has_content = True
			
 
				+        else:
			
 
				+            if current_batch_has_content:
			
 
				+                batches.append(current_batch + base_footer)
			
 
				+            current_batch = base_header + stats_header
			
 
				+            current_batch_has_content = True
			
 
				+
			
 
				+        # 逐个处理词组（确保词组标题+第一条新闻的原子性）
			
 
				+        for i, stat in enumerate(report_data["stats"]):
			
 
				+            word = stat["word"]
			
 
				+            count = stat["count"]
			
 
				+            sequence_display = f"[{i + 1}/{total_count}]"
			
 
				+
			
 
				+            # 构建词组标题
			
 
				+            word_header = ""
			
 
				+            if format_type in ("wework", "bark"):
			
 
				+                if count >= 10:
			
 
				+                    word_header = (
			
 
				+                        f"🔥 {sequence_display} **{word}** : **{count}** 条\n\n"
			
 
				+                    )
			
 
				+                elif count >= 5:
			
 
				+                    word_header = (
			
 
				+                        f"📈 {sequence_display} **{word}** : **{count}** 条\n\n"
			
 
				+                    )
			
 
				+                else:
			
 
				+                    word_header = f"📌 {sequence_display} **{word}** : {count} 条\n\n"
			
 
				+            elif format_type == "telegram":
			
 
				+                if count >= 10:
			
 
				+                    word_header = f"🔥 {sequence_display} {word} : {count} 条\n\n"
			
 
				+                elif count >= 5:
			
 
				+                    word_header = f"📈 {sequence_display} {word} : {count} 条\n\n"
			
 
				+                else:
			
 
				+                    word_header = f"📌 {sequence_display} {word} : {count} 条\n\n"
			
 
				+            elif format_type == "ntfy":
			
 
				+                if count >= 10:
			
 
				+                    word_header = (
			
 
				+                        f"🔥 {sequence_display} **{word}** : **{count}** 条\n\n"
			
 
				+                    )
			
 
				+                elif count >= 5:
			
 
				+                    word_header = (
			
 
				+                        f"📈 {sequence_display} **{word}** : **{count}** 条\n\n"
			
 
				+                    )
			
 
				+                else:
			
 
				+                    word_header = f"📌 {sequence_display} **{word}** : {count} 条\n\n"
			
 
				+            elif format_type == "feishu":
			
 
				+                if count >= 10:
			
 
				+                    word_header = f"🔥 <font color='grey'>{sequence_display}</font> **{word}** : <font color='red'>{count}</font> 条\n\n"
			
 
				+                elif count >= 5:
			
 
				+                    word_header = f"📈 <font color='grey'>{sequence_display}</font> **{word}** : <font color='orange'>{count}</font> 条\n\n"
			
 
				+                else:
			
 
				+                    word_header = f"📌 <font color='grey'>{sequence_display}</font> **{word}** : {count} 条\n\n"
			
 
				+            elif format_type == "dingtalk":
			
 
				+                if count >= 10:
			
 
				+                    word_header = (
			
 
				+                        f"🔥 {sequence_display} **{word}** : **{count}** 条\n\n"
			
 
				+                    )
			
 
				+                elif count >= 5:
			
 
				+                    word_header = (
			
 
				+                        f"📈 {sequence_display} **{word}** : **{count}** 条\n\n"
			
 
				+                    )
			
 
				+                else:
			
 
				+                    word_header = f"📌 {sequence_display} **{word}** : {count} 条\n\n"
			
 
				+            elif format_type == "slack":
			
 
				+                if count >= 10:
			
 
				+                    word_header = (
			
 
				+                        f"🔥 {sequence_display} *{word}* : *{count}* 条\n\n"
			
 
				+                    )
			
 
				+                elif count >= 5:
			
 
				+                    word_header = (
			
 
				+                        f"📈 {sequence_display} *{word}* : *{count}* 条\n\n"
			
 
				+                    )
			
 
				+                else:
			
 
				+                    word_header = f"📌 {sequence_display} *{word}* : {count} 条\n\n"
			
 
				+
			
 
				+            # 构建第一条新闻
			
 
				+            first_news_line = ""
			
 
				+            if stat["titles"]:
			
 
				+                first_title_data = stat["titles"][0]
			
 
				+                if format_type in ("wework", "bark"):
			
 
				+                    formatted_title = format_title_for_platform(
			
 
				+                        "wework", first_title_data, show_source=True
			
 
				+                    )
			
 
				+                elif format_type == "telegram":
			
 
				+                    formatted_title = format_title_for_platform(
			
 
				+                        "telegram", first_title_data, show_source=True
			
 
				+                    )
			
 
				+                elif format_type == "ntfy":
			
 
				+                    formatted_title = format_title_for_platform(
			
 
				+                        "ntfy", first_title_data, show_source=True
			
 
				+                    )
			
 
				+                elif format_type == "feishu":
			
 
				+                    formatted_title = format_title_for_platform(
			
 
				+                        "feishu", first_title_data, show_source=True
			
 
				+                    )
			
 
				+                elif format_type == "dingtalk":
			
 
				+                    formatted_title = format_title_for_platform(
			
 
				+                        "dingtalk", first_title_data, show_source=True
			
 
				+                    )
			
 
				+                elif format_type == "slack":
			
 
				+                    formatted_title = format_title_for_platform(
			
 
				+                        "slack", first_title_data, show_source=True
			
 
				+                    )
			
 
				+                else:
			
 
				+                    formatted_title = f"{first_title_data['title']}"
			
 
				+
			
 
				+                first_news_line = f"  1. {formatted_title}\n"
			
 
				+                if len(stat["titles"]) > 1:
			
 
				+                    first_news_line += "\n"
			
 
				+
			
 
				+            # 原子性检查：词组标题+第一条新闻必须一起处理
			
 
				+            word_with_first_news = word_header + first_news_line
			
 
				+            test_content = current_batch + word_with_first_news
			
 
				+
			
 
				+            if (
			
 
				+                len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
			
 
				+                >= max_bytes
			
 
				+            ):
			
 
				+                # 当前批次容纳不下，开启新批次
			
 
				+                if current_batch_has_content:
			
 
				+                    batches.append(current_batch + base_footer)
			
 
				+                current_batch = base_header + stats_header + word_with_first_news
			
 
				+                current_batch_has_content = True
			
 
				+                start_index = 1
			
 
				+            else:
			
 
				+                current_batch = test_content
			
 
				+                current_batch_has_content = True
			
 
				+                start_index = 1
			
 
				+
			
 
				+            # 处理剩余新闻条目
			
 
				+            for j in range(start_index, len(stat["titles"])):
			
 
				+                title_data = stat["titles"][j]
			
 
				+                if format_type in ("wework", "bark"):
			
 
				+                    formatted_title = format_title_for_platform(
			
 
				+                        "wework", title_data, show_source=True
			
 
				+                    )
			
 
				+                elif format_type == "telegram":
			
 
				+                    formatted_title = format_title_for_platform(
			
 
				+                        "telegram", title_data, show_source=True
			
 
				+                    )
			
 
				+                elif format_type == "ntfy":
			
 
				+                    formatted_title = format_title_for_platform(
			
 
				+                        "ntfy", title_data, show_source=True
			
 
				+                    )
			
 
				+                elif format_type == "feishu":
			
 
				+                    formatted_title = format_title_for_platform(
			
 
				+                        "feishu", title_data, show_source=True
			
 
				+                    )
			
 
				+                elif format_type == "dingtalk":
			
 
				+                    formatted_title = format_title_for_platform(
			
 
				+                        "dingtalk", title_data, show_source=True
			
 
				+                    )
			
 
				+                elif format_type == "slack":
			
 
				+                    formatted_title = format_title_for_platform(
			
 
				+                        "slack", title_data, show_source=True
			
 
				+                    )
			
 
				+                else:
			
 
				+                    formatted_title = f"{title_data['title']}"
			
 
				+
			
 
				+                news_line = f"  {j + 1}. {formatted_title}\n"
			
 
				+                if j < len(stat["titles"]) - 1:
			
 
				+                    news_line += "\n"
			
 
				+
			
 
				+                test_content = current_batch + news_line
			
 
				+                if (
			
 
				+                    len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
			
 
				+                    >= max_bytes
			
 
				+                ):
			
 
				+                    if current_batch_has_content:
			
 
				+                        batches.append(current_batch + base_footer)
			
 
				+                    current_batch = base_header + stats_header + word_header + news_line
			
 
				+                    current_batch_has_content = True
			
 
				+                else:
			
 
				+                    current_batch = test_content
			
 
				+                    current_batch_has_content = True
			
 
				+
			
 
				+            # 词组间分隔符
			
 
				+            if i < len(report_data["stats"]) - 1:
			
 
				+                separator = ""
			
 
				+                if format_type in ("wework", "bark"):
			
 
				+                    separator = f"\n\n\n\n"
			
 
				+                elif format_type == "telegram":
			
 
				+                    separator = f"\n\n"
			
 
				+                elif format_type == "ntfy":
			
 
				+                    separator = f"\n\n"
			
 
				+                elif format_type == "feishu":
			
 
				+                    separator = f"\n{feishu_separator}\n\n"
			
 
				+                elif format_type == "dingtalk":
			
 
				+                    separator = f"\n---\n\n"
			
 
				+                elif format_type == "slack":
			
 
				+                    separator = f"\n\n"
			
 
				+
			
 
				+                test_content = current_batch + separator
			
 
				+                if (
			
 
				+                    len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
			
 
				+                    < max_bytes
			
 
				+                ):
			
 
				+                    current_batch = test_content
			
 
				+
			
 
				+        return current_batch, current_batch_has_content, batches
			
 
				+
			
 
				+    # 定义处理新增新闻的函数
			
 
				+    def process_new_titles_section(current_batch, current_batch_has_content, batches):
			
 
				+        """处理新增新闻"""
			
 
				+        if not report_data["new_titles"]:
			
 
				+            return current_batch, current_batch_has_content, batches
			
 
				+
			
 
				+        new_header = ""
			
 
				+        if format_type in ("wework", "bark"):
			
 
				+            new_header = f"\n\n\n\n🆕 **本次新增热点新闻** (共 {report_data['total_new_count']} 条)\n\n"
			
 
				+        elif format_type == "telegram":
			
 
				+            new_header = (
			
 
				+                f"\n\n🆕 本次新增热点新闻 (共 {report_data['total_new_count']} 条)\n\n"
			
 
				+            )
			
 
				+        elif format_type == "ntfy":
			
 
				+            new_header = f"\n\n🆕 **本次新增热点新闻** (共 {report_data['total_new_count']} 条)\n\n"
			
 
				+        elif format_type == "feishu":
			
 
				+            new_header = f"\n{feishu_separator}\n\n🆕 **本次新增热点新闻** (共 {report_data['total_new_count']} 条)\n\n"
			
 
				+        elif format_type == "dingtalk":
			
 
				+            new_header = f"\n---\n\n🆕 **本次新增热点新闻** (共 {report_data['total_new_count']} 条)\n\n"
			
 
				+        elif format_type == "slack":
			
 
				+            new_header = f"\n\n🆕 *本次新增热点新闻* (共 {report_data['total_new_count']} 条)\n\n"
			
 
				+
			
 
				+        test_content = current_batch + new_header
			
 
				+        if (
			
 
				+            len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
			
 
				+            >= max_bytes
			
 
				+        ):
			
 
				+            if current_batch_has_content:
			
 
				+                batches.append(current_batch + base_footer)
			
 
				+            current_batch = base_header + new_header
			
 
				+            current_batch_has_content = True
			
 
				+        else:
			
 
				+            current_batch = test_content
			
 
				+            current_batch_has_content = True
			
 
				+
			
 
				+        # 逐个处理新增新闻来源
			
 
				+        for source_data in report_data["new_titles"]:
			
 
				+            source_header = ""
			
 
				+            if format_type in ("wework", "bark"):
			
 
				+                source_header = f"**{source_data['source_name']}** ({len(source_data['titles'])} 条):\n\n"
			
 
				+            elif format_type == "telegram":
			
 
				+                source_header = f"{source_data['source_name']} ({len(source_data['titles'])} 条):\n\n"
			
 
				+            elif format_type == "ntfy":
			
 
				+                source_header = f"**{source_data['source_name']}** ({len(source_data['titles'])} 条):\n\n"
			
 
				+            elif format_type == "feishu":
			
 
				+                source_header = f"**{source_data['source_name']}** ({len(source_data['titles'])} 条):\n\n"
			
 
				+            elif format_type == "dingtalk":
			
 
				+                source_header = f"**{source_data['source_name']}** ({len(source_data['titles'])} 条):\n\n"
			
 
				+            elif format_type == "slack":
			
 
				+                source_header = f"*{source_data['source_name']}* ({len(source_data['titles'])} 条):\n\n"
			
 
				+
			
 
				+            # 构建第一条新增新闻
			
 
				+            first_news_line = ""
			
 
				+            if source_data["titles"]:
			
 
				+                first_title_data = source_data["titles"][0]
			
 
				+                title_data_copy = first_title_data.copy()
			
 
				+                title_data_copy["is_new"] = False
			
 
				+
			
 
				+                if format_type in ("wework", "bark"):
			
 
				+                    formatted_title = format_title_for_platform(
			
 
				+                        "wework", title_data_copy, show_source=False
			
 
				+                    )
			
 
				+                elif format_type == "telegram":
			
 
				+                    formatted_title = format_title_for_platform(
			
 
				+                        "telegram", title_data_copy, show_source=False
			
 
				+                    )
			
 
				+                elif format_type == "feishu":
			
 
				+                    formatted_title = format_title_for_platform(
			
 
				+                        "feishu", title_data_copy, show_source=False
			
 
				+                    )
			
 
				+                elif format_type == "dingtalk":
			
 
				+                    formatted_title = format_title_for_platform(
			
 
				+                        "dingtalk", title_data_copy, show_source=False
			
 
				+                    )
			
 
				+                elif format_type == "slack":
			
 
				+                    formatted_title = format_title_for_platform(
			
 
				+                        "slack", title_data_copy, show_source=False
			
 
				+                    )
			
 
				+                else:
			
 
				+                    formatted_title = f"{title_data_copy['title']}"
			
 
				+
			
 
				+                first_news_line = f"  1. {formatted_title}\n"
			
 
				+
			
 
				+            # 原子性检查：来源标题+第一条新闻
			
 
				+            source_with_first_news = source_header + first_news_line
			
 
				+            test_content = current_batch + source_with_first_news
			
 
				+
			
 
				+            if (
			
 
				+                len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
			
 
				+                >= max_bytes
			
 
				+            ):
			
 
				+                if current_batch_has_content:
			
 
				+                    batches.append(current_batch + base_footer)
			
 
				+                current_batch = base_header + new_header + source_with_first_news
			
 
				+                current_batch_has_content = True
			
 
				+                start_index = 1
			
 
				+            else:
			
 
				+                current_batch = test_content
			
 
				+                current_batch_has_content = True
			
 
				+                start_index = 1
			
 
				+
			
 
				+            # 处理剩余新增新闻
			
 
				+            for j in range(start_index, len(source_data["titles"])):
			
 
				+                title_data = source_data["titles"][j]
			
 
				+                title_data_copy = title_data.copy()
			
 
				+                title_data_copy["is_new"] = False
			
 
				+
			
 
				+                if format_type == "wework":
			
 
				+                    formatted_title = format_title_for_platform(
			
 
				+                        "wework", title_data_copy, show_source=False
			
 
				+                    )
			
 
				+                elif format_type == "telegram":
			
 
				+                    formatted_title = format_title_for_platform(
			
 
				+                        "telegram", title_data_copy, show_source=False
			
 
				+                    )
			
 
				+                elif format_type == "feishu":
			
 
				+                    formatted_title = format_title_for_platform(
			
 
				+                        "feishu", title_data_copy, show_source=False
			
 
				+                    )
			
 
				+                elif format_type == "dingtalk":
			
 
				+                    formatted_title = format_title_for_platform(
			
 
				+                        "dingtalk", title_data_copy, show_source=False
			
 
				+                    )
			
 
				+                elif format_type == "slack":
			
 
				+                    formatted_title = format_title_for_platform(
			
 
				+                        "slack", title_data_copy, show_source=False
			
 
				+                    )
			
 
				+                else:
			
 
				+                    formatted_title = f"{title_data_copy['title']}"
			
 
				+
			
 
				+                news_line = f"  {j + 1}. {formatted_title}\n"
			
 
				+
			
 
				+                test_content = current_batch + news_line
			
 
				+                if (
			
 
				+                    len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
			
 
				+                    >= max_bytes
			
 
				+                ):
			
 
				+                    if current_batch_has_content:
			
 
				+                        batches.append(current_batch + base_footer)
			
 
				+                    current_batch = base_header + new_header + source_header + news_line
			
 
				+                    current_batch_has_content = True
			
 
				+                else:
			
 
				+                    current_batch = test_content
			
 
				+                    current_batch_has_content = True
			
 
				+
			
 
				+            current_batch += "\n"
			
 
				+
			
 
				+        return current_batch, current_batch_has_content, batches
			
 
				+
			
 
				+    # 根据配置决定处理顺序
			
 
				+    if reverse_content_order:
			
 
				+        # 新增热点在前，热点词汇统计在后
			
 
				+        current_batch, current_batch_has_content, batches = process_new_titles_section(
			
 
				+            current_batch, current_batch_has_content, batches
			
 
				+        )
			
 
				+        current_batch, current_batch_has_content, batches = process_stats_section(
			
 
				+            current_batch, current_batch_has_content, batches
			
 
				+        )
			
 
				+    else:
			
 
				+        # 默认：热点词汇统计在前，新增热点在后
			
 
				+        current_batch, current_batch_has_content, batches = process_stats_section(
			
 
				+            current_batch, current_batch_has_content, batches
			
 
				+        )
			
 
				+        current_batch, current_batch_has_content, batches = process_new_titles_section(
			
 
				+            current_batch, current_batch_has_content, batches
			
 
				+        )
			
 
				+
			
 
				+    if report_data["failed_ids"]:
			
 
				+        failed_header = ""
			
 
				+        if format_type == "wework":
			
 
				+            failed_header = f"\n\n\n\n⚠️ **数据获取失败的平台：**\n\n"
			
 
				+        elif format_type == "telegram":
			
 
				+            failed_header = f"\n\n⚠️ 数据获取失败的平台：\n\n"
			
 
				+        elif format_type == "ntfy":
			
 
				+            failed_header = f"\n\n⚠️ **数据获取失败的平台：**\n\n"
			
 
				+        elif format_type == "feishu":
			
 
				+            failed_header = f"\n{feishu_separator}\n\n⚠️ **数据获取失败的平台：**\n\n"
			
 
				+        elif format_type == "dingtalk":
			
 
				+            failed_header = f"\n---\n\n⚠️ **数据获取失败的平台：**\n\n"
			
 
				+
			
 
				+        test_content = current_batch + failed_header
			
 
				+        if (
			
 
				+            len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
			
 
				+            >= max_bytes
			
 
				+        ):
			
 
				+            if current_batch_has_content:
			
 
				+                batches.append(current_batch + base_footer)
			
 
				+            current_batch = base_header + failed_header
			
 
				+            current_batch_has_content = True
			
 
				+        else:
			
 
				+            current_batch = test_content
			
 
				+            current_batch_has_content = True
			
 
				+
			
 
				+        for i, id_value in enumerate(report_data["failed_ids"], 1):
			
 
				+            if format_type == "feishu":
			
 
				+                failed_line = f"  • <font color='red'>{id_value}</font>\n"
			
 
				+            elif format_type == "dingtalk":
			
 
				+                failed_line = f"  • **{id_value}**\n"
			
 
				+            else:
			
 
				+                failed_line = f"  • {id_value}\n"
			
 
				+
			
 
				+            test_content = current_batch + failed_line
			
 
				+            if (
			
 
				+                len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
			
 
				+                >= max_bytes
			
 
				+            ):
			
 
				+                if current_batch_has_content:
			
 
				+                    batches.append(current_batch + base_footer)
			
 
				+                current_batch = base_header + failed_header + failed_line
			
 
				+                current_batch_has_content = True
			
 
				+            else:
			
 
				+                current_batch = test_content
			
 
				+                current_batch_has_content = True
			
 
				+
			
 
				+    # 完成最后批次
			
 
				+    if current_batch_has_content:
			
 
				+        batches.append(current_batch + base_footer)
			
 
				+
			
 
				+    return batches
			
--- a/trendradar/report/__init__.py
+++ b/trendradar/report/__init__.py
@@ -0,0 +1,40 @@
 
				+# coding=utf-8
			
 
				+"""
			
 
				+报告生成模块
			
 
				+
			
 
				+提供报告生成和格式化功能，包括：
			
 
				+- HTML 报告生成
			
 
				+- 标题格式化工具
			
 
				+
			
 
				+模块结构：
			
 
				+- helpers: 报告辅助函数（清理、转义、格式化）
			
 
				+- formatter: 平台标题格式化
			
 
				+- html: HTML 报告渲染
			
 
				+- generator: 报告生成器
			
 
				+"""
			
 
				+
			
 
				+from trendradar.report.helpers import (
			
 
				+    clean_title,
			
 
				+    html_escape,
			
 
				+    format_rank_display,
			
 
				+)
			
 
				+from trendradar.report.formatter import format_title_for_platform
			
 
				+from trendradar.report.html import render_html_content
			
 
				+from trendradar.report.generator import (
			
 
				+    prepare_report_data,
			
 
				+    generate_html_report,
			
 
				+)
			
 
				+
			
 
				+__all__ = [
			
 
				+    # 辅助函数
			
 
				+    "clean_title",
			
 
				+    "html_escape",
			
 
				+    "format_rank_display",
			
 
				+    # 格式化函数
			
 
				+    "format_title_for_platform",
			
 
				+    # HTML 渲染
			
 
				+    "render_html_content",
			
 
				+    # 报告生成器
			
 
				+    "prepare_report_data",
			
 
				+    "generate_html_report",
			
 
				+]
			
--- a/trendradar/report/formatter.py
+++ b/trendradar/report/formatter.py
@@ -0,0 +1,223 @@
 
				+# coding=utf-8
			
 
				+"""
			
 
				+平台标题格式化模块
			
 
				+
			
 
				+提供多平台标题格式化功能
			
 
				+"""
			
 
				+
			
 
				+from typing import Dict
			
 
				+
			
 
				+from trendradar.report.helpers import clean_title, html_escape, format_rank_display
			
 
				+
			
 
				+
			
 
				+def format_title_for_platform(
			
 
				+    platform: str, title_data: Dict, show_source: bool = True
			
 
				+) -> str:
			
 
				+    """统一的标题格式化方法
			
 
				+
			
 
				+    为不同平台生成对应格式的标题字符串。
			
 
				+
			
 
				+    Args:
			
 
				+        platform: 目标平台，支持:
			
 
				+            - "feishu": 飞书
			
 
				+            - "dingtalk": 钉钉
			
 
				+            - "wework": 企业微信
			
 
				+            - "bark": Bark
			
 
				+            - "telegram": Telegram
			
 
				+            - "ntfy": ntfy
			
 
				+            - "slack": Slack
			
 
				+            - "html": HTML 报告
			
 
				+        title_data: 标题数据字典，包含以下字段:
			
 
				+            - title: 标题文本
			
 
				+            - source_name: 来源名称
			
 
				+            - time_display: 时间显示
			
 
				+            - count: 出现次数
			
 
				+            - ranks: 排名列表
			
 
				+            - rank_threshold: 高亮阈值
			
 
				+            - url: PC端链接
			
 
				+            - mobile_url: 移动端链接（优先使用）
			
 
				+            - is_new: 是否为新增标题（可选）
			
 
				+        show_source: 是否显示来源名称
			
 
				+
			
 
				+    Returns:
			
 
				+        格式化后的标题字符串
			
 
				+    """
			
 
				+    rank_display = format_rank_display(
			
 
				+        title_data["ranks"], title_data["rank_threshold"], platform
			
 
				+    )
			
 
				+
			
 
				+    link_url = title_data["mobile_url"] or title_data["url"]
			
 
				+    cleaned_title = clean_title(title_data["title"])
			
 
				+
			
 
				+    if platform == "feishu":
			
 
				+        if link_url:
			
 
				+            formatted_title = f"[{cleaned_title}]({link_url})"
			
 
				+        else:
			
 
				+            formatted_title = cleaned_title
			
 
				+
			
 
				+        title_prefix = "🆕 " if title_data.get("is_new") else ""
			
 
				+
			
 
				+        if show_source:
			
 
				+            result = f"<font color='grey'>[{title_data['source_name']}]</font> {title_prefix}{formatted_title}"
			
 
				+        else:
			
 
				+            result = f"{title_prefix}{formatted_title}"
			
 
				+
			
 
				+        if rank_display:
			
 
				+            result += f" {rank_display}"
			
 
				+        if title_data["time_display"]:
			
 
				+            result += f" <font color='grey'>- {title_data['time_display']}</font>"
			
 
				+        if title_data["count"] > 1:
			
 
				+            result += f" <font color='green'>({title_data['count']}次)</font>"
			
 
				+
			
 
				+        return result
			
 
				+
			
 
				+    elif platform == "dingtalk":
			
 
				+        if link_url:
			
 
				+            formatted_title = f"[{cleaned_title}]({link_url})"
			
 
				+        else:
			
 
				+            formatted_title = cleaned_title
			
 
				+
			
 
				+        title_prefix = "🆕 " if title_data.get("is_new") else ""
			
 
				+
			
 
				+        if show_source:
			
 
				+            result = f"[{title_data['source_name']}] {title_prefix}{formatted_title}"
			
 
				+        else:
			
 
				+            result = f"{title_prefix}{formatted_title}"
			
 
				+
			
 
				+        if rank_display:
			
 
				+            result += f" {rank_display}"
			
 
				+        if title_data["time_display"]:
			
 
				+            result += f" - {title_data['time_display']}"
			
 
				+        if title_data["count"] > 1:
			
 
				+            result += f" ({title_data['count']}次)"
			
 
				+
			
 
				+        return result
			
 
				+
			
 
				+    elif platform in ("wework", "bark"):
			
 
				+        # WeWork 和 Bark 使用 markdown 格式
			
 
				+        if link_url:
			
 
				+            formatted_title = f"[{cleaned_title}]({link_url})"
			
 
				+        else:
			
 
				+            formatted_title = cleaned_title
			
 
				+
			
 
				+        title_prefix = "🆕 " if title_data.get("is_new") else ""
			
 
				+
			
 
				+        if show_source:
			
 
				+            result = f"[{title_data['source_name']}] {title_prefix}{formatted_title}"
			
 
				+        else:
			
 
				+            result = f"{title_prefix}{formatted_title}"
			
 
				+
			
 
				+        if rank_display:
			
 
				+            result += f" {rank_display}"
			
 
				+        if title_data["time_display"]:
			
 
				+            result += f" - {title_data['time_display']}"
			
 
				+        if title_data["count"] > 1:
			
 
				+            result += f" ({title_data['count']}次)"
			
 
				+
			
 
				+        return result
			
 
				+
			
 
				+    elif platform == "telegram":
			
 
				+        if link_url:
			
 
				+            formatted_title = f'<a href="{link_url}">{html_escape(cleaned_title)}</a>'
			
 
				+        else:
			
 
				+            formatted_title = cleaned_title
			
 
				+
			
 
				+        title_prefix = "🆕 " if title_data.get("is_new") else ""
			
 
				+
			
 
				+        if show_source:
			
 
				+            result = f"[{title_data['source_name']}] {title_prefix}{formatted_title}"
			
 
				+        else:
			
 
				+            result = f"{title_prefix}{formatted_title}"
			
 
				+
			
 
				+        if rank_display:
			
 
				+            result += f" {rank_display}"
			
 
				+        if title_data["time_display"]:
			
 
				+            result += f" <code>- {title_data['time_display']}</code>"
			
 
				+        if title_data["count"] > 1:
			
 
				+            result += f" <code>({title_data['count']}次)</code>"
			
 
				+
			
 
				+        return result
			
 
				+
			
 
				+    elif platform == "ntfy":
			
 
				+        if link_url:
			
 
				+            formatted_title = f"[{cleaned_title}]({link_url})"
			
 
				+        else:
			
 
				+            formatted_title = cleaned_title
			
 
				+
			
 
				+        title_prefix = "🆕 " if title_data.get("is_new") else ""
			
 
				+
			
 
				+        if show_source:
			
 
				+            result = f"[{title_data['source_name']}] {title_prefix}{formatted_title}"
			
 
				+        else:
			
 
				+            result = f"{title_prefix}{formatted_title}"
			
 
				+
			
 
				+        if rank_display:
			
 
				+            result += f" {rank_display}"
			
 
				+        if title_data["time_display"]:
			
 
				+            result += f" `- {title_data['time_display']}`"
			
 
				+        if title_data["count"] > 1:
			
 
				+            result += f" `({title_data['count']}次)`"
			
 
				+
			
 
				+        return result
			
 
				+
			
 
				+    elif platform == "slack":
			
 
				+        # Slack 使用 mrkdwn 格式
			
 
				+        if link_url:
			
 
				+            # Slack 链接格式: <url|text>
			
 
				+            formatted_title = f"<{link_url}|{cleaned_title}>"
			
 
				+        else:
			
 
				+            formatted_title = cleaned_title
			
 
				+
			
 
				+        title_prefix = "🆕 " if title_data.get("is_new") else ""
			
 
				+
			
 
				+        if show_source:
			
 
				+            result = f"[{title_data['source_name']}] {title_prefix}{formatted_title}"
			
 
				+        else:
			
 
				+            result = f"{title_prefix}{formatted_title}"
			
 
				+
			
 
				+        # 排名（使用 * 加粗）
			
 
				+        rank_display = format_rank_display(
			
 
				+            title_data["ranks"], title_data["rank_threshold"], "slack"
			
 
				+        )
			
 
				+        if rank_display:
			
 
				+            result += f" {rank_display}"
			
 
				+        if title_data["time_display"]:
			
 
				+            result += f" `- {title_data['time_display']}`"
			
 
				+        if title_data["count"] > 1:
			
 
				+            result += f" `({title_data['count']}次)`"
			
 
				+
			
 
				+        return result
			
 
				+
			
 
				+    elif platform == "html":
			
 
				+        rank_display = format_rank_display(
			
 
				+            title_data["ranks"], title_data["rank_threshold"], "html"
			
 
				+        )
			
 
				+
			
 
				+        link_url = title_data["mobile_url"] or title_data["url"]
			
 
				+
			
 
				+        escaped_title = html_escape(cleaned_title)
			
 
				+        escaped_source_name = html_escape(title_data["source_name"])
			
 
				+
			
 
				+        if link_url:
			
 
				+            escaped_url = html_escape(link_url)
			
 
				+            formatted_title = f'[{escaped_source_name}] <a href="{escaped_url}" target="_blank" class="news-link">{escaped_title}</a>'
			
 
				+        else:
			
 
				+            formatted_title = (
			
 
				+                f'[{escaped_source_name}] <span class="no-link">{escaped_title}</span>'
			
 
				+            )
			
 
				+
			
 
				+        if rank_display:
			
 
				+            formatted_title += f" {rank_display}"
			
 
				+        if title_data["time_display"]:
			
 
				+            escaped_time = html_escape(title_data["time_display"])
			
 
				+            formatted_title += f" <font color='grey'>- {escaped_time}</font>"
			
 
				+        if title_data["count"] > 1:
			
 
				+            formatted_title += f" <font color='green'>({title_data['count']}次)</font>"
			
 
				+
			
 
				+        if title_data.get("is_new"):
			
 
				+            formatted_title = f"<div class='new-title'>🆕 {formatted_title}</div>"
			
 
				+
			
 
				+        return formatted_title
			
 
				+
			
 
				+    else:
			
 
				+        return cleaned_title
			
--- a/trendradar/report/generator.py
+++ b/trendradar/report/generator.py
@@ -0,0 +1,235 @@
 
				+# coding=utf-8
			
 
				+"""
			
 
				+报告生成模块
			
 
				+
			
 
				+提供报告数据准备和 HTML 生成功能：
			
 
				+- prepare_report_data: 准备报告数据
			
 
				+- generate_html_report: 生成 HTML 报告
			
 
				+"""
			
 
				+
			
 
				+from pathlib import Path
			
 
				+from typing import Dict, List, Optional, Callable
			
 
				+
			
 
				+
			
 
				+def prepare_report_data(
			
 
				+    stats: List[Dict],
			
 
				+    failed_ids: Optional[List] = None,
			
 
				+    new_titles: Optional[Dict] = None,
			
 
				+    id_to_name: Optional[Dict] = None,
			
 
				+    mode: str = "daily",
			
 
				+    rank_threshold: int = 3,
			
 
				+    matches_word_groups_func: Optional[Callable] = None,
			
 
				+    load_frequency_words_func: Optional[Callable] = None,
			
 
				+) -> Dict:
			
 
				+    """
			
 
				+    准备报告数据
			
 
				+
			
 
				+    Args:
			
 
				+        stats: 统计结果列表
			
 
				+        failed_ids: 失败的 ID 列表
			
 
				+        new_titles: 新增标题
			
 
				+        id_to_name: ID 到名称的映射
			
 
				+        mode: 报告模式 (daily/incremental/current)
			
 
				+        rank_threshold: 排名阈值
			
 
				+        matches_word_groups_func: 词组匹配函数
			
 
				+        load_frequency_words_func: 加载频率词函数
			
 
				+
			
 
				+    Returns:
			
 
				+        Dict: 准备好的报告数据
			
 
				+    """
			
 
				+    processed_new_titles = []
			
 
				+
			
 
				+    # 在增量模式下隐藏新增新闻区域
			
 
				+    hide_new_section = mode == "incremental"
			
 
				+
			
 
				+    # 只有在非隐藏模式下才处理新增新闻部分
			
 
				+    if not hide_new_section:
			
 
				+        filtered_new_titles = {}
			
 
				+        if new_titles and id_to_name:
			
 
				+            # 如果提供了匹配函数，使用它过滤
			
 
				+            if matches_word_groups_func and load_frequency_words_func:
			
 
				+                word_groups, filter_words, global_filters = load_frequency_words_func()
			
 
				+                for source_id, titles_data in new_titles.items():
			
 
				+                    filtered_titles = {}
			
 
				+                    for title, title_data in titles_data.items():
			
 
				+                        if matches_word_groups_func(title, word_groups, filter_words, global_filters):
			
 
				+                            filtered_titles[title] = title_data
			
 
				+                    if filtered_titles:
			
 
				+                        filtered_new_titles[source_id] = filtered_titles
			
 
				+            else:
			
 
				+                # 没有匹配函数时，使用全部
			
 
				+                filtered_new_titles = new_titles
			
 
				+
			
 
				+            # 打印过滤后的新增热点数（与推送显示一致）
			
 
				+            original_new_count = sum(len(titles) for titles in new_titles.values()) if new_titles else 0
			
 
				+            filtered_new_count = sum(len(titles) for titles in filtered_new_titles.values()) if filtered_new_titles else 0
			
 
				+            if original_new_count > 0:
			
 
				+                print(f"频率词过滤后：{filtered_new_count} 条新增热点匹配（原始 {original_new_count} 条）")
			
 
				+
			
 
				+        if filtered_new_titles and id_to_name:
			
 
				+            for source_id, titles_data in filtered_new_titles.items():
			
 
				+                source_name = id_to_name.get(source_id, source_id)
			
 
				+                source_titles = []
			
 
				+
			
 
				+                for title, title_data in titles_data.items():
			
 
				+                    url = title_data.get("url", "")
			
 
				+                    mobile_url = title_data.get("mobileUrl", "")
			
 
				+                    ranks = title_data.get("ranks", [])
			
 
				+
			
 
				+                    processed_title = {
			
 
				+                        "title": title,
			
 
				+                        "source_name": source_name,
			
 
				+                        "time_display": "",
			
 
				+                        "count": 1,
			
 
				+                        "ranks": ranks,
			
 
				+                        "rank_threshold": rank_threshold,
			
 
				+                        "url": url,
			
 
				+                        "mobile_url": mobile_url,
			
 
				+                        "is_new": True,
			
 
				+                    }
			
 
				+                    source_titles.append(processed_title)
			
 
				+
			
 
				+                if source_titles:
			
 
				+                    processed_new_titles.append(
			
 
				+                        {
			
 
				+                            "source_id": source_id,
			
 
				+                            "source_name": source_name,
			
 
				+                            "titles": source_titles,
			
 
				+                        }
			
 
				+                    )
			
 
				+
			
 
				+    processed_stats = []
			
 
				+    for stat in stats:
			
 
				+        if stat["count"] <= 0:
			
 
				+            continue
			
 
				+
			
 
				+        processed_titles = []
			
 
				+        for title_data in stat["titles"]:
			
 
				+            processed_title = {
			
 
				+                "title": title_data["title"],
			
 
				+                "source_name": title_data["source_name"],
			
 
				+                "time_display": title_data["time_display"],
			
 
				+                "count": title_data["count"],
			
 
				+                "ranks": title_data["ranks"],
			
 
				+                "rank_threshold": title_data["rank_threshold"],
			
 
				+                "url": title_data.get("url", ""),
			
 
				+                "mobile_url": title_data.get("mobileUrl", ""),
			
 
				+                "is_new": title_data.get("is_new", False),
			
 
				+            }
			
 
				+            processed_titles.append(processed_title)
			
 
				+
			
 
				+        processed_stats.append(
			
 
				+            {
			
 
				+                "word": stat["word"],
			
 
				+                "count": stat["count"],
			
 
				+                "percentage": stat.get("percentage", 0),
			
 
				+                "titles": processed_titles,
			
 
				+            }
			
 
				+        )
			
 
				+
			
 
				+    return {
			
 
				+        "stats": processed_stats,
			
 
				+        "new_titles": processed_new_titles,
			
 
				+        "failed_ids": failed_ids or [],
			
 
				+        "total_new_count": sum(
			
 
				+            len(source["titles"]) for source in processed_new_titles
			
 
				+        ),
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+def generate_html_report(
			
 
				+    stats: List[Dict],
			
 
				+    total_titles: int,
			
 
				+    failed_ids: Optional[List] = None,
			
 
				+    new_titles: Optional[Dict] = None,
			
 
				+    id_to_name: Optional[Dict] = None,
			
 
				+    mode: str = "daily",
			
 
				+    is_daily_summary: bool = False,
			
 
				+    update_info: Optional[Dict] = None,
			
 
				+    rank_threshold: int = 3,
			
 
				+    output_dir: str = "output",
			
 
				+    date_folder: str = "",
			
 
				+    time_filename: str = "",
			
 
				+    render_html_func: Optional[Callable] = None,
			
 
				+    matches_word_groups_func: Optional[Callable] = None,
			
 
				+    load_frequency_words_func: Optional[Callable] = None,
			
 
				+    enable_index_copy: bool = True,
			
 
				+) -> str:
			
 
				+    """
			
 
				+    生成 HTML 报告
			
 
				+
			
 
				+    Args:
			
 
				+        stats: 统计结果列表
			
 
				+        total_titles: 总标题数
			
 
				+        failed_ids: 失败的 ID 列表
			
 
				+        new_titles: 新增标题
			
 
				+        id_to_name: ID 到名称的映射
			
 
				+        mode: 报告模式 (daily/incremental/current)
			
 
				+        is_daily_summary: 是否是每日汇总
			
 
				+        update_info: 更新信息
			
 
				+        rank_threshold: 排名阈值
			
 
				+        output_dir: 输出目录
			
 
				+        date_folder: 日期文件夹名称
			
 
				+        time_filename: 时间文件名
			
 
				+        render_html_func: HTML 渲染函数
			
 
				+        matches_word_groups_func: 词组匹配函数
			
 
				+        load_frequency_words_func: 加载频率词函数
			
 
				+        enable_index_copy: 是否复制到 index.html
			
 
				+
			
 
				+    Returns:
			
 
				+        str: 生成的 HTML 文件路径
			
 
				+    """
			
 
				+    if is_daily_summary:
			
 
				+        if mode == "current":
			
 
				+            filename = "当前榜单汇总.html"
			
 
				+        elif mode == "incremental":
			
 
				+            filename = "当日增量.html"
			
 
				+        else:
			
 
				+            filename = "当日汇总.html"
			
 
				+    else:
			
 
				+        filename = f"{time_filename}.html"
			
 
				+
			
 
				+    # 构建输出路径
			
 
				+    output_path = Path(output_dir) / date_folder / "html"
			
 
				+    output_path.mkdir(parents=True, exist_ok=True)
			
 
				+    file_path = str(output_path / filename)
			
 
				+
			
 
				+    # 准备报告数据
			
 
				+    report_data = prepare_report_data(
			
 
				+        stats,
			
 
				+        failed_ids,
			
 
				+        new_titles,
			
 
				+        id_to_name,
			
 
				+        mode,
			
 
				+        rank_threshold,
			
 
				+        matches_word_groups_func,
			
 
				+        load_frequency_words_func,
			
 
				+    )
			
 
				+
			
 
				+    # 渲染 HTML 内容
			
 
				+    if render_html_func:
			
 
				+        html_content = render_html_func(
			
 
				+            report_data, total_titles, is_daily_summary, mode, update_info
			
 
				+        )
			
 
				+    else:
			
 
				+        # 默认简单 HTML
			
 
				+        html_content = f"<html><body><h1>Report</h1><pre>{report_data}</pre></body></html>"
			
 
				+
			
 
				+    # 写入文件
			
 
				+    with open(file_path, "w", encoding="utf-8") as f:
			
 
				+        f.write(html_content)
			
 
				+
			
 
				+    # 如果是每日汇总且启用 index 复制
			
 
				+    if is_daily_summary and enable_index_copy:
			
 
				+        # 生成到根目录（供 GitHub Pages 访问）
			
 
				+        root_index_path = Path("index.html")
			
 
				+        with open(root_index_path, "w", encoding="utf-8") as f:
			
 
				+            f.write(html_content)
			
 
				+
			
 
				+        # 同时生成到 output 目录（供 Docker Volume 挂载访问）
			
 
				+        output_index_path = Path(output_dir) / "index.html"
			
 
				+        Path(output_dir).mkdir(parents=True, exist_ok=True)
			
 
				+        with open(output_index_path, "w", encoding="utf-8") as f:
			
 
				+            f.write(html_content)
			
 
				+
			
 
				+    return file_path
			
--- a/trendradar/report/helpers.py
+++ b/trendradar/report/helpers.py
@@ -0,0 +1,125 @@
 
				+# coding=utf-8
			
 
				+"""
			
 
				+报告辅助函数模块
			
 
				+
			
 
				+提供报告生成相关的通用辅助函数
			
 
				+"""
			
 
				+
			
 
				+import re
			
 
				+from typing import List
			
 
				+
			
 
				+
			
 
				+def clean_title(title: str) -> str:
			
 
				+    """清理标题中的特殊字符
			
 
				+
			
 
				+    清理规则：
			
 
				+    - 将换行符(\n, \r)替换为空格
			
 
				+    - 将多个连续空白字符合并为单个空格
			
 
				+    - 去除首尾空白
			
 
				+
			
 
				+    Args:
			
 
				+        title: 原始标题字符串
			
 
				+
			
 
				+    Returns:
			
 
				+        清理后的标题字符串
			
 
				+    """
			
 
				+    if not isinstance(title, str):
			
 
				+        title = str(title)
			
 
				+    cleaned_title = title.replace("\n", " ").replace("\r", " ")
			
 
				+    cleaned_title = re.sub(r"\s+", " ", cleaned_title)
			
 
				+    cleaned_title = cleaned_title.strip()
			
 
				+    return cleaned_title
			
 
				+
			
 
				+
			
 
				+def html_escape(text: str) -> str:
			
 
				+    """HTML特殊字符转义
			
 
				+
			
 
				+    转义规则（按顺序）：
			
 
				+    - & → &amp;
			
 
				+    - < → &lt;
			
 
				+    - > → &gt;
			
 
				+    - " → &quot;
			
 
				+    - ' → &#x27;
			
 
				+
			
 
				+    Args:
			
 
				+        text: 原始文本
			
 
				+
			
 
				+    Returns:
			
 
				+        转义后的文本
			
 
				+    """
			
 
				+    if not isinstance(text, str):
			
 
				+        text = str(text)
			
 
				+
			
 
				+    return (
			
 
				+        text.replace("&", "&amp;")
			
 
				+        .replace("<", "&lt;")
			
 
				+        .replace(">", "&gt;")
			
 
				+        .replace('"', "&quot;")
			
 
				+        .replace("'", "&#x27;")
			
 
				+    )
			
 
				+
			
 
				+
			
 
				+def format_rank_display(ranks: List[int], rank_threshold: int, format_type: str) -> str:
			
 
				+    """格式化排名显示
			
 
				+
			
 
				+    根据不同平台类型生成对应格式的排名字符串。
			
 
				+    当最小排名小于等于阈值时，使用高亮格式。
			
 
				+
			
 
				+    Args:
			
 
				+        ranks: 排名列表（可能包含重复值）
			
 
				+        rank_threshold: 高亮阈值，小于等于此值的排名会高亮显示
			
 
				+        format_type: 平台类型，支持:
			
 
				+            - "html": HTML格式
			
 
				+            - "feishu": 飞书格式
			
 
				+            - "dingtalk": 钉钉格式
			
 
				+            - "wework": 企业微信格式
			
 
				+            - "telegram": Telegram格式
			
 
				+            - "slack": Slack格式
			
 
				+            - 其他: 默认markdown格式
			
 
				+
			
 
				+    Returns:
			
 
				+        格式化后的排名字符串，如 "[1]" 或 "[1 - 5]"
			
 
				+        如果排名列表为空，返回空字符串
			
 
				+    """
			
 
				+    if not ranks:
			
 
				+        return ""
			
 
				+
			
 
				+    unique_ranks = sorted(set(ranks))
			
 
				+    min_rank = unique_ranks[0]
			
 
				+    max_rank = unique_ranks[-1]
			
 
				+
			
 
				+    # 根据平台类型选择高亮格式
			
 
				+    if format_type == "html":
			
 
				+        highlight_start = "<font color='red'><strong>"
			
 
				+        highlight_end = "</strong></font>"
			
 
				+    elif format_type == "feishu":
			
 
				+        highlight_start = "<font color='red'>**"
			
 
				+        highlight_end = "**</font>"
			
 
				+    elif format_type == "dingtalk":
			
 
				+        highlight_start = "**"
			
 
				+        highlight_end = "**"
			
 
				+    elif format_type == "wework":
			
 
				+        highlight_start = "**"
			
 
				+        highlight_end = "**"
			
 
				+    elif format_type == "telegram":
			
 
				+        highlight_start = "<b>"
			
 
				+        highlight_end = "</b>"
			
 
				+    elif format_type == "slack":
			
 
				+        highlight_start = "*"
			
 
				+        highlight_end = "*"
			
 
				+    else:
			
 
				+        # 默认 markdown 格式
			
 
				+        highlight_start = "**"
			
 
				+        highlight_end = "**"
			
 
				+
			
 
				+    # 生成排名显示
			
 
				+    if min_rank <= rank_threshold:
			
 
				+        if min_rank == max_rank:
			
 
				+            return f"{highlight_start}[{min_rank}]{highlight_end}"
			
 
				+        else:
			
 
				+            return f"{highlight_start}[{min_rank} - {max_rank}]{highlight_end}"
			
 
				+    else:
			
 
				+        if min_rank == max_rank:
			
 
				+            return f"[{min_rank}]"
			
 
				+        else:
			
 
				+            return f"[{min_rank} - {max_rank}]"
			
--- a/trendradar/report/html.py
+++ b/trendradar/report/html.py
@@ -0,0 +1,1050 @@
 
				+# coding=utf-8
			
 
				+"""
			
 
				+HTML 报告渲染模块
			
 
				+
			
 
				+提供 HTML 格式的热点新闻报告生成功能
			
 
				+"""
			
 
				+
			
 
				+from datetime import datetime
			
 
				+from typing import Dict, Optional, Callable
			
 
				+
			
 
				+from trendradar.report.helpers import html_escape
			
 
				+
			
 
				+
			
 
				+def render_html_content(
			
 
				+    report_data: Dict,
			
 
				+    total_titles: int,
			
 
				+    is_daily_summary: bool = False,
			
 
				+    mode: str = "daily",
			
 
				+    update_info: Optional[Dict] = None,
			
 
				+    *,
			
 
				+    reverse_content_order: bool = False,
			
 
				+    get_time_func: Optional[Callable[[], datetime]] = None,
			
 
				+) -> str:
			
 
				+    """渲染HTML内容
			
 
				+
			
 
				+    Args:
			
 
				+        report_data: 报告数据字典，包含 stats, new_titles, failed_ids, total_new_count
			
 
				+        total_titles: 新闻总数
			
 
				+        is_daily_summary: 是否为当日汇总
			
 
				+        mode: 报告模式 ("daily", "current", "incremental")
			
 
				+        update_info: 更新信息（可选）
			
 
				+        reverse_content_order: 是否反转内容顺序（新增热点在前）
			
 
				+        get_time_func: 获取当前时间的函数（可选，默认使用 datetime.now）
			
 
				+
			
 
				+    Returns:
			
 
				+        渲染后的 HTML 字符串
			
 
				+    """
			
 
				+    html = """
			
 
				+    <!DOCTYPE html>
			
 
				+    <html>
			
 
				+    <head>
			
 
				+        <meta charset="UTF-8">
			
 
				+        <meta name="viewport" content="width=device-width, initial-scale=1.0">
			
 
				+        <title>热点新闻分析</title>
			
 
				+        <script src="https://cdnjs.cloudflare.com/ajax/libs/html2canvas/1.4.1/html2canvas.min.js" integrity="sha512-BNaRQnYJYiPSqHHDb58B0yaPfCu+Wgds8Gp/gU33kqBtgNS4tSPHuGibyoeqMV/TJlSKda6FXzoEyYGjTe+vXA==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
			
 
				+        <style>
			
 
				+            * { box-sizing: border-box; }
			
 
				+            body {
			
 
				+                font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', system-ui, sans-serif;
			
 
				+                margin: 0;
			
 
				+                padding: 16px;
			
 
				+                background: #fafafa;
			
 
				+                color: #333;
			
 
				+                line-height: 1.5;
			
 
				+            }
			
 
				+
			
 
				+            .container {
			
 
				+                max-width: 600px;
			
 
				+                margin: 0 auto;
			
 
				+                background: white;
			
 
				+                border-radius: 12px;
			
 
				+                overflow: hidden;
			
 
				+                box-shadow: 0 2px 16px rgba(0,0,0,0.06);
			
 
				+            }
			
 
				+
			
 
				+            .header {
			
 
				+                background: linear-gradient(135deg, #4f46e5 0%, #7c3aed 100%);
			
 
				+                color: white;
			
 
				+                padding: 32px 24px;
			
 
				+                text-align: center;
			
 
				+                position: relative;
			
 
				+            }
			
 
				+
			
 
				+            .save-buttons {
			
 
				+                position: absolute;
			
 
				+                top: 16px;
			
 
				+                right: 16px;
			
 
				+                display: flex;
			
 
				+                gap: 8px;
			
 
				+            }
			
 
				+
			
 
				+            .save-btn {
			
 
				+                background: rgba(255, 255, 255, 0.2);
			
 
				+                border: 1px solid rgba(255, 255, 255, 0.3);
			
 
				+                color: white;
			
 
				+                padding: 8px 16px;
			
 
				+                border-radius: 6px;
			
 
				+                cursor: pointer;
			
 
				+                font-size: 13px;
			
 
				+                font-weight: 500;
			
 
				+                transition: all 0.2s ease;
			
 
				+                backdrop-filter: blur(10px);
			
 
				+                white-space: nowrap;
			
 
				+            }
			
 
				+
			
 
				+            .save-btn:hover {
			
 
				+                background: rgba(255, 255, 255, 0.3);
			
 
				+                border-color: rgba(255, 255, 255, 0.5);
			
 
				+                transform: translateY(-1px);
			
 
				+            }
			
 
				+
			
 
				+            .save-btn:active {
			
 
				+                transform: translateY(0);
			
 
				+            }
			
 
				+
			
 
				+            .save-btn:disabled {
			
 
				+                opacity: 0.6;
			
 
				+                cursor: not-allowed;
			
 
				+            }
			
 
				+
			
 
				+            .header-title {
			
 
				+                font-size: 22px;
			
 
				+                font-weight: 700;
			
 
				+                margin: 0 0 20px 0;
			
 
				+            }
			
 
				+
			
 
				+            .header-info {
			
 
				+                display: grid;
			
 
				+                grid-template-columns: 1fr 1fr;
			
 
				+                gap: 16px;
			
 
				+                font-size: 14px;
			
 
				+                opacity: 0.95;
			
 
				+            }
			
 
				+
			
 
				+            .info-item {
			
 
				+                text-align: center;
			
 
				+            }
			
 
				+
			
 
				+            .info-label {
			
 
				+                display: block;
			
 
				+                font-size: 12px;
			
 
				+                opacity: 0.8;
			
 
				+                margin-bottom: 4px;
			
 
				+            }
			
 
				+
			
 
				+            .info-value {
			
 
				+                font-weight: 600;
			
 
				+                font-size: 16px;
			
 
				+            }
			
 
				+
			
 
				+            .content {
			
 
				+                padding: 24px;
			
 
				+            }
			
 
				+
			
 
				+            .word-group {
			
 
				+                margin-bottom: 40px;
			
 
				+            }
			
 
				+
			
 
				+            .word-group:first-child {
			
 
				+                margin-top: 0;
			
 
				+            }
			
 
				+
			
 
				+            .word-header {
			
 
				+                display: flex;
			
 
				+                align-items: center;
			
 
				+                justify-content: space-between;
			
 
				+                margin-bottom: 20px;
			
 
				+                padding-bottom: 8px;
			
 
				+                border-bottom: 1px solid #f0f0f0;
			
 
				+            }
			
 
				+
			
 
				+            .word-info {
			
 
				+                display: flex;
			
 
				+                align-items: center;
			
 
				+                gap: 12px;
			
 
				+            }
			
 
				+
			
 
				+            .word-name {
			
 
				+                font-size: 17px;
			
 
				+                font-weight: 600;
			
 
				+                color: #1a1a1a;
			
 
				+            }
			
 
				+
			
 
				+            .word-count {
			
 
				+                color: #666;
			
 
				+                font-size: 13px;
			
 
				+                font-weight: 500;
			
 
				+            }
			
 
				+
			
 
				+            .word-count.hot { color: #dc2626; font-weight: 600; }
			
 
				+            .word-count.warm { color: #ea580c; font-weight: 600; }
			
 
				+
			
 
				+            .word-index {
			
 
				+                color: #999;
			
 
				+                font-size: 12px;
			
 
				+            }
			
 
				+
			
 
				+            .news-item {
			
 
				+                margin-bottom: 20px;
			
 
				+                padding: 16px 0;
			
 
				+                border-bottom: 1px solid #f5f5f5;
			
 
				+                position: relative;
			
 
				+                display: flex;
			
 
				+                gap: 12px;
			
 
				+                align-items: center;
			
 
				+            }
			
 
				+
			
 
				+            .news-item:last-child {
			
 
				+                border-bottom: none;
			
 
				+            }
			
 
				+
			
 
				+            .news-item.new::after {
			
 
				+                content: "NEW";
			
 
				+                position: absolute;
			
 
				+                top: 12px;
			
 
				+                right: 0;
			
 
				+                background: #fbbf24;
			
 
				+                color: #92400e;
			
 
				+                font-size: 9px;
			
 
				+                font-weight: 700;
			
 
				+                padding: 3px 6px;
			
 
				+                border-radius: 4px;
			
 
				+                letter-spacing: 0.5px;
			
 
				+            }
			
 
				+
			
 
				+            .news-number {
			
 
				+                color: #999;
			
 
				+                font-size: 13px;
			
 
				+                font-weight: 600;
			
 
				+                min-width: 20px;
			
 
				+                text-align: center;
			
 
				+                flex-shrink: 0;
			
 
				+                background: #f8f9fa;
			
 
				+                border-radius: 50%;
			
 
				+                width: 24px;
			
 
				+                height: 24px;
			
 
				+                display: flex;
			
 
				+                align-items: center;
			
 
				+                justify-content: center;
			
 
				+                align-self: flex-start;
			
 
				+                margin-top: 8px;
			
 
				+            }
			
 
				+
			
 
				+            .news-content {
			
 
				+                flex: 1;
			
 
				+                min-width: 0;
			
 
				+                padding-right: 40px;
			
 
				+            }
			
 
				+
			
 
				+            .news-item.new .news-content {
			
 
				+                padding-right: 50px;
			
 
				+            }
			
 
				+
			
 
				+            .news-header {
			
 
				+                display: flex;
			
 
				+                align-items: center;
			
 
				+                gap: 8px;
			
 
				+                margin-bottom: 8px;
			
 
				+                flex-wrap: wrap;
			
 
				+            }
			
 
				+
			
 
				+            .source-name {
			
 
				+                color: #666;
			
 
				+                font-size: 12px;
			
 
				+                font-weight: 500;
			
 
				+            }
			
 
				+
			
 
				+            .rank-num {
			
 
				+                color: #fff;
			
 
				+                background: #6b7280;
			
 
				+                font-size: 10px;
			
 
				+                font-weight: 700;
			
 
				+                padding: 2px 6px;
			
 
				+                border-radius: 10px;
			
 
				+                min-width: 18px;
			
 
				+                text-align: center;
			
 
				+            }
			
 
				+
			
 
				+            .rank-num.top { background: #dc2626; }
			
 
				+            .rank-num.high { background: #ea580c; }
			
 
				+
			
 
				+            .time-info {
			
 
				+                color: #999;
			
 
				+                font-size: 11px;
			
 
				+            }
			
 
				+
			
 
				+            .count-info {
			
 
				+                color: #059669;
			
 
				+                font-size: 11px;
			
 
				+                font-weight: 500;
			
 
				+            }
			
 
				+
			
 
				+            .news-title {
			
 
				+                font-size: 15px;
			
 
				+                line-height: 1.4;
			
 
				+                color: #1a1a1a;
			
 
				+                margin: 0;
			
 
				+            }
			
 
				+
			
 
				+            .news-link {
			
 
				+                color: #2563eb;
			
 
				+                text-decoration: none;
			
 
				+            }
			
 
				+
			
 
				+            .news-link:hover {
			
 
				+                text-decoration: underline;
			
 
				+            }
			
 
				+
			
 
				+            .news-link:visited {
			
 
				+                color: #7c3aed;
			
 
				+            }
			
 
				+
			
 
				+            .new-section {
			
 
				+                margin-top: 40px;
			
 
				+                padding-top: 24px;
			
 
				+                border-top: 2px solid #f0f0f0;
			
 
				+            }
			
 
				+
			
 
				+            .new-section-title {
			
 
				+                color: #1a1a1a;
			
 
				+                font-size: 16px;
			
 
				+                font-weight: 600;
			
 
				+                margin: 0 0 20px 0;
			
 
				+            }
			
 
				+
			
 
				+            .new-source-group {
			
 
				+                margin-bottom: 24px;
			
 
				+            }
			
 
				+
			
 
				+            .new-source-title {
			
 
				+                color: #666;
			
 
				+                font-size: 13px;
			
 
				+                font-weight: 500;
			
 
				+                margin: 0 0 12px 0;
			
 
				+                padding-bottom: 6px;
			
 
				+                border-bottom: 1px solid #f5f5f5;
			
 
				+            }
			
 
				+
			
 
				+            .new-item {
			
 
				+                display: flex;
			
 
				+                align-items: center;
			
 
				+                gap: 12px;
			
 
				+                padding: 8px 0;
			
 
				+                border-bottom: 1px solid #f9f9f9;
			
 
				+            }
			
 
				+
			
 
				+            .new-item:last-child {
			
 
				+                border-bottom: none;
			
 
				+            }
			
 
				+
			
 
				+            .new-item-number {
			
 
				+                color: #999;
			
 
				+                font-size: 12px;
			
 
				+                font-weight: 600;
			
 
				+                min-width: 18px;
			
 
				+                text-align: center;
			
 
				+                flex-shrink: 0;
			
 
				+                background: #f8f9fa;
			
 
				+                border-radius: 50%;
			
 
				+                width: 20px;
			
 
				+                height: 20px;
			
 
				+                display: flex;
			
 
				+                align-items: center;
			
 
				+                justify-content: center;
			
 
				+            }
			
 
				+
			
 
				+            .new-item-rank {
			
 
				+                color: #fff;
			
 
				+                background: #6b7280;
			
 
				+                font-size: 10px;
			
 
				+                font-weight: 700;
			
 
				+                padding: 3px 6px;
			
 
				+                border-radius: 8px;
			
 
				+                min-width: 20px;
			
 
				+                text-align: center;
			
 
				+                flex-shrink: 0;
			
 
				+            }
			
 
				+
			
 
				+            .new-item-rank.top { background: #dc2626; }
			
 
				+            .new-item-rank.high { background: #ea580c; }
			
 
				+
			
 
				+            .new-item-content {
			
 
				+                flex: 1;
			
 
				+                min-width: 0;
			
 
				+            }
			
 
				+
			
 
				+            .new-item-title {
			
 
				+                font-size: 14px;
			
 
				+                line-height: 1.4;
			
 
				+                color: #1a1a1a;
			
 
				+                margin: 0;
			
 
				+            }
			
 
				+
			
 
				+            .error-section {
			
 
				+                background: #fef2f2;
			
 
				+                border: 1px solid #fecaca;
			
 
				+                border-radius: 8px;
			
 
				+                padding: 16px;
			
 
				+                margin-bottom: 24px;
			
 
				+            }
			
 
				+
			
 
				+            .error-title {
			
 
				+                color: #dc2626;
			
 
				+                font-size: 14px;
			
 
				+                font-weight: 600;
			
 
				+                margin: 0 0 8px 0;
			
 
				+            }
			
 
				+
			
 
				+            .error-list {
			
 
				+                list-style: none;
			
 
				+                padding: 0;
			
 
				+                margin: 0;
			
 
				+            }
			
 
				+
			
 
				+            .error-item {
			
 
				+                color: #991b1b;
			
 
				+                font-size: 13px;
			
 
				+                padding: 2px 0;
			
 
				+                font-family: 'SF Mono', Consolas, monospace;
			
 
				+            }
			
 
				+
			
 
				+            .footer {
			
 
				+                margin-top: 32px;
			
 
				+                padding: 20px 24px;
			
 
				+                background: #f8f9fa;
			
 
				+                border-top: 1px solid #e5e7eb;
			
 
				+                text-align: center;
			
 
				+            }
			
 
				+
			
 
				+            .footer-content {
			
 
				+                font-size: 13px;
			
 
				+                color: #6b7280;
			
 
				+                line-height: 1.6;
			
 
				+            }
			
 
				+
			
 
				+            .footer-link {
			
 
				+                color: #4f46e5;
			
 
				+                text-decoration: none;
			
 
				+                font-weight: 500;
			
 
				+                transition: color 0.2s ease;
			
 
				+            }
			
 
				+
			
 
				+            .footer-link:hover {
			
 
				+                color: #7c3aed;
			
 
				+                text-decoration: underline;
			
 
				+            }
			
 
				+
			
 
				+            .project-name {
			
 
				+                font-weight: 600;
			
 
				+                color: #374151;
			
 
				+            }
			
 
				+
			
 
				+            @media (max-width: 480px) {
			
 
				+                body { padding: 12px; }
			
 
				+                .header { padding: 24px 20px; }
			
 
				+                .content { padding: 20px; }
			
 
				+                .footer { padding: 16px 20px; }
			
 
				+                .header-info { grid-template-columns: 1fr; gap: 12px; }
			
 
				+                .news-header { gap: 6px; }
			
 
				+                .news-content { padding-right: 45px; }
			
 
				+                .news-item { gap: 8px; }
			
 
				+                .new-item { gap: 8px; }
			
 
				+                .news-number { width: 20px; height: 20px; font-size: 12px; }
			
 
				+                .save-buttons {
			
 
				+                    position: static;
			
 
				+                    margin-bottom: 16px;
			
 
				+                    display: flex;
			
 
				+                    gap: 8px;
			
 
				+                    justify-content: center;
			
 
				+                    flex-direction: column;
			
 
				+                    width: 100%;
			
 
				+                }
			
 
				+                .save-btn {
			
 
				+                    width: 100%;
			
 
				+                }
			
 
				+            }
			
 
				+        </style>
			
 
				+    </head>
			
 
				+    <body>
			
 
				+        <div class="container">
			
 
				+            <div class="header">
			
 
				+                <div class="save-buttons">
			
 
				+                    <button class="save-btn" onclick="saveAsImage()">保存为图片</button>
			
 
				+                    <button class="save-btn" onclick="saveAsMultipleImages()">分段保存</button>
			
 
				+                </div>
			
 
				+                <div class="header-title">热点新闻分析</div>
			
 
				+                <div class="header-info">
			
 
				+                    <div class="info-item">
			
 
				+                        <span class="info-label">报告类型</span>
			
 
				+                        <span class="info-value">"""
			
 
				+
			
 
				+    # 处理报告类型显示
			
 
				+    if is_daily_summary:
			
 
				+        if mode == "current":
			
 
				+            html += "当前榜单"
			
 
				+        elif mode == "incremental":
			
 
				+            html += "增量模式"
			
 
				+        else:
			
 
				+            html += "当日汇总"
			
 
				+    else:
			
 
				+        html += "实时分析"
			
 
				+
			
 
				+    html += """</span>
			
 
				+                    </div>
			
 
				+                    <div class="info-item">
			
 
				+                        <span class="info-label">新闻总数</span>
			
 
				+                        <span class="info-value">"""
			
 
				+
			
 
				+    html += f"{total_titles} 条"
			
 
				+
			
 
				+    # 计算筛选后的热点新闻数量
			
 
				+    hot_news_count = sum(len(stat["titles"]) for stat in report_data["stats"])
			
 
				+
			
 
				+    html += """</span>
			
 
				+                    </div>
			
 
				+                    <div class="info-item">
			
 
				+                        <span class="info-label">热点新闻</span>
			
 
				+                        <span class="info-value">"""
			
 
				+
			
 
				+    html += f"{hot_news_count} 条"
			
 
				+
			
 
				+    html += """</span>
			
 
				+                    </div>
			
 
				+                    <div class="info-item">
			
 
				+                        <span class="info-label">生成时间</span>
			
 
				+                        <span class="info-value">"""
			
 
				+
			
 
				+    # 使用提供的时间函数或默认 datetime.now
			
 
				+    if get_time_func:
			
 
				+        now = get_time_func()
			
 
				+    else:
			
 
				+        now = datetime.now()
			
 
				+    html += now.strftime("%m-%d %H:%M")
			
 
				+
			
 
				+    html += """</span>
			
 
				+                    </div>
			
 
				+                </div>
			
 
				+            </div>
			
 
				+
			
 
				+            <div class="content">"""
			
 
				+
			
 
				+    # 处理失败ID错误信息
			
 
				+    if report_data["failed_ids"]:
			
 
				+        html += """
			
 
				+                <div class="error-section">
			
 
				+                    <div class="error-title">⚠️ 请求失败的平台</div>
			
 
				+                    <ul class="error-list">"""
			
 
				+        for id_value in report_data["failed_ids"]:
			
 
				+            html += f'<li class="error-item">{html_escape(id_value)}</li>'
			
 
				+        html += """
			
 
				+                    </ul>
			
 
				+                </div>"""
			
 
				+
			
 
				+    # 生成热点词汇统计部分的HTML
			
 
				+    stats_html = ""
			
 
				+    if report_data["stats"]:
			
 
				+        total_count = len(report_data["stats"])
			
 
				+
			
 
				+        for i, stat in enumerate(report_data["stats"], 1):
			
 
				+            count = stat["count"]
			
 
				+
			
 
				+            # 确定热度等级
			
 
				+            if count >= 10:
			
 
				+                count_class = "hot"
			
 
				+            elif count >= 5:
			
 
				+                count_class = "warm"
			
 
				+            else:
			
 
				+                count_class = ""
			
 
				+
			
 
				+            escaped_word = html_escape(stat["word"])
			
 
				+
			
 
				+            stats_html += f"""
			
 
				+                <div class="word-group">
			
 
				+                    <div class="word-header">
			
 
				+                        <div class="word-info">
			
 
				+                            <div class="word-name">{escaped_word}</div>
			
 
				+                            <div class="word-count {count_class}">{count} 条</div>
			
 
				+                        </div>
			
 
				+                        <div class="word-index">{i}/{total_count}</div>
			
 
				+                    </div>"""
			
 
				+
			
 
				+            # 处理每个词组下的新闻标题，给每条新闻标上序号
			
 
				+            for j, title_data in enumerate(stat["titles"], 1):
			
 
				+                is_new = title_data.get("is_new", False)
			
 
				+                new_class = "new" if is_new else ""
			
 
				+
			
 
				+                stats_html += f"""
			
 
				+                    <div class="news-item {new_class}">
			
 
				+                        <div class="news-number">{j}</div>
			
 
				+                        <div class="news-content">
			
 
				+                            <div class="news-header">
			
 
				+                                <span class="source-name">{html_escape(title_data["source_name"])}</span>"""
			
 
				+
			
 
				+                # 处理排名显示
			
 
				+                ranks = title_data.get("ranks", [])
			
 
				+                if ranks:
			
 
				+                    min_rank = min(ranks)
			
 
				+                    max_rank = max(ranks)
			
 
				+                    rank_threshold = title_data.get("rank_threshold", 10)
			
 
				+
			
 
				+                    # 确定排名等级
			
 
				+                    if min_rank <= 3:
			
 
				+                        rank_class = "top"
			
 
				+                    elif min_rank <= rank_threshold:
			
 
				+                        rank_class = "high"
			
 
				+                    else:
			
 
				+                        rank_class = ""
			
 
				+
			
 
				+                    if min_rank == max_rank:
			
 
				+                        rank_text = str(min_rank)
			
 
				+                    else:
			
 
				+                        rank_text = f"{min_rank}-{max_rank}"
			
 
				+
			
 
				+                    stats_html += f'<span class="rank-num {rank_class}">{rank_text}</span>'
			
 
				+
			
 
				+                # 处理时间显示
			
 
				+                time_display = title_data.get("time_display", "")
			
 
				+                if time_display:
			
 
				+                    # 简化时间显示格式，将波浪线替换为~
			
 
				+                    simplified_time = (
			
 
				+                        time_display.replace(" ~ ", "~")
			
 
				+                        .replace("[", "")
			
 
				+                        .replace("]", "")
			
 
				+                    )
			
 
				+                    stats_html += (
			
 
				+                        f'<span class="time-info">{html_escape(simplified_time)}</span>'
			
 
				+                    )
			
 
				+
			
 
				+                # 处理出现次数
			
 
				+                count_info = title_data.get("count", 1)
			
 
				+                if count_info > 1:
			
 
				+                    stats_html += f'<span class="count-info">{count_info}次</span>'
			
 
				+
			
 
				+                stats_html += """
			
 
				+                            </div>
			
 
				+                            <div class="news-title">"""
			
 
				+
			
 
				+                # 处理标题和链接
			
 
				+                escaped_title = html_escape(title_data["title"])
			
 
				+                link_url = title_data.get("mobile_url") or title_data.get("url", "")
			
 
				+
			
 
				+                if link_url:
			
 
				+                    escaped_url = html_escape(link_url)
			
 
				+                    stats_html += f'<a href="{escaped_url}" target="_blank" class="news-link">{escaped_title}</a>'
			
 
				+                else:
			
 
				+                    stats_html += escaped_title
			
 
				+
			
 
				+                stats_html += """
			
 
				+                            </div>
			
 
				+                        </div>
			
 
				+                    </div>"""
			
 
				+
			
 
				+            stats_html += """
			
 
				+                </div>"""
			
 
				+
			
 
				+    # 生成新增新闻区域的HTML
			
 
				+    new_titles_html = ""
			
 
				+    if report_data["new_titles"]:
			
 
				+        new_titles_html += f"""
			
 
				+                <div class="new-section">
			
 
				+                    <div class="new-section-title">本次新增热点 (共 {report_data['total_new_count']} 条)</div>"""
			
 
				+
			
 
				+        for source_data in report_data["new_titles"]:
			
 
				+            escaped_source = html_escape(source_data["source_name"])
			
 
				+            titles_count = len(source_data["titles"])
			
 
				+
			
 
				+            new_titles_html += f"""
			
 
				+                    <div class="new-source-group">
			
 
				+                        <div class="new-source-title">{escaped_source} · {titles_count}条</div>"""
			
 
				+
			
 
				+            # 为新增新闻也添加序号
			
 
				+            for idx, title_data in enumerate(source_data["titles"], 1):
			
 
				+                ranks = title_data.get("ranks", [])
			
 
				+
			
 
				+                # 处理新增新闻的排名显示
			
 
				+                rank_class = ""
			
 
				+                if ranks:
			
 
				+                    min_rank = min(ranks)
			
 
				+                    if min_rank <= 3:
			
 
				+                        rank_class = "top"
			
 
				+                    elif min_rank <= title_data.get("rank_threshold", 10):
			
 
				+                        rank_class = "high"
			
 
				+
			
 
				+                    if len(ranks) == 1:
			
 
				+                        rank_text = str(ranks[0])
			
 
				+                    else:
			
 
				+                        rank_text = f"{min(ranks)}-{max(ranks)}"
			
 
				+                else:
			
 
				+                    rank_text = "?"
			
 
				+
			
 
				+                new_titles_html += f"""
			
 
				+                        <div class="new-item">
			
 
				+                            <div class="new-item-number">{idx}</div>
			
 
				+                            <div class="new-item-rank {rank_class}">{rank_text}</div>
			
 
				+                            <div class="new-item-content">
			
 
				+                                <div class="new-item-title">"""
			
 
				+
			
 
				+                # 处理新增新闻的链接
			
 
				+                escaped_title = html_escape(title_data["title"])
			
 
				+                link_url = title_data.get("mobile_url") or title_data.get("url", "")
			
 
				+
			
 
				+                if link_url:
			
 
				+                    escaped_url = html_escape(link_url)
			
 
				+                    new_titles_html += f'<a href="{escaped_url}" target="_blank" class="news-link">{escaped_title}</a>'
			
 
				+                else:
			
 
				+                    new_titles_html += escaped_title
			
 
				+
			
 
				+                new_titles_html += """
			
 
				+                                </div>
			
 
				+                            </div>
			
 
				+                        </div>"""
			
 
				+
			
 
				+            new_titles_html += """
			
 
				+                    </div>"""
			
 
				+
			
 
				+        new_titles_html += """
			
 
				+                </div>"""
			
 
				+
			
 
				+    # 根据配置决定内容顺序
			
 
				+    if reverse_content_order:
			
 
				+        # 新增热点在前，热点词汇统计在后
			
 
				+        html += new_titles_html + stats_html
			
 
				+    else:
			
 
				+        # 默认：热点词汇统计在前，新增热点在后
			
 
				+        html += stats_html + new_titles_html
			
 
				+
			
 
				+    html += """
			
 
				+            </div>
			
 
				+
			
 
				+            <div class="footer">
			
 
				+                <div class="footer-content">
			
 
				+                    由 <span class="project-name">TrendRadar</span> 生成 ·
			
 
				+                    <a href="https://github.com/sansan0/TrendRadar" target="_blank" class="footer-link">
			
 
				+                        GitHub 开源项目
			
 
				+                    </a>"""
			
 
				+
			
 
				+    if update_info:
			
 
				+        html += f"""
			
 
				+                    <br>
			
 
				+                    <span style="color: #ea580c; font-weight: 500;">
			
 
				+                        发现新版本 {update_info['remote_version']}，当前版本 {update_info['current_version']}
			
 
				+                    </span>"""
			
 
				+
			
 
				+    html += """
			
 
				+                </div>
			
 
				+            </div>
			
 
				+        </div>
			
 
				+
			
 
				+        <script>
			
 
				+            async function saveAsImage() {
			
 
				+                const button = event.target;
			
 
				+                const originalText = button.textContent;
			
 
				+
			
 
				+                try {
			
 
				+                    button.textContent = '生成中...';
			
 
				+                    button.disabled = true;
			
 
				+                    window.scrollTo(0, 0);
			
 
				+
			
 
				+                    // 等待页面稳定
			
 
				+                    await new Promise(resolve => setTimeout(resolve, 200));
			
 
				+
			
 
				+                    // 截图前隐藏按钮
			
 
				+                    const buttons = document.querySelector('.save-buttons');
			
 
				+                    buttons.style.visibility = 'hidden';
			
 
				+
			
 
				+                    // 再次等待确保按钮完全隐藏
			
 
				+                    await new Promise(resolve => setTimeout(resolve, 100));
			
 
				+
			
 
				+                    const container = document.querySelector('.container');
			
 
				+
			
 
				+                    const canvas = await html2canvas(container, {
			
 
				+                        backgroundColor: '#ffffff',
			
 
				+                        scale: 1.5,
			
 
				+                        useCORS: true,
			
 
				+                        allowTaint: false,
			
 
				+                        imageTimeout: 10000,
			
 
				+                        removeContainer: false,
			
 
				+                        foreignObjectRendering: false,
			
 
				+                        logging: false,
			
 
				+                        width: container.offsetWidth,
			
 
				+                        height: container.offsetHeight,
			
 
				+                        x: 0,
			
 
				+                        y: 0,
			
 
				+                        scrollX: 0,
			
 
				+                        scrollY: 0,
			
 
				+                        windowWidth: window.innerWidth,
			
 
				+                        windowHeight: window.innerHeight
			
 
				+                    });
			
 
				+
			
 
				+                    buttons.style.visibility = 'visible';
			
 
				+
			
 
				+                    const link = document.createElement('a');
			
 
				+                    const now = new Date();
			
 
				+                    const filename = `TrendRadar_热点新闻分析_${now.getFullYear()}${String(now.getMonth() + 1).padStart(2, '0')}${String(now.getDate()).padStart(2, '0')}_${String(now.getHours()).padStart(2, '0')}${String(now.getMinutes()).padStart(2, '0')}.png`;
			
 
				+
			
 
				+                    link.download = filename;
			
 
				+                    link.href = canvas.toDataURL('image/png', 1.0);
			
 
				+
			
 
				+                    // 触发下载
			
 
				+                    document.body.appendChild(link);
			
 
				+                    link.click();
			
 
				+                    document.body.removeChild(link);
			
 
				+
			
 
				+                    button.textContent = '保存成功!';
			
 
				+                    setTimeout(() => {
			
 
				+                        button.textContent = originalText;
			
 
				+                        button.disabled = false;
			
 
				+                    }, 2000);
			
 
				+
			
 
				+                } catch (error) {
			
 
				+                    const buttons = document.querySelector('.save-buttons');
			
 
				+                    buttons.style.visibility = 'visible';
			
 
				+                    button.textContent = '保存失败';
			
 
				+                    setTimeout(() => {
			
 
				+                        button.textContent = originalText;
			
 
				+                        button.disabled = false;
			
 
				+                    }, 2000);
			
 
				+                }
			
 
				+            }
			
 
				+
			
 
				+            async function saveAsMultipleImages() {
			
 
				+                const button = event.target;
			
 
				+                const originalText = button.textContent;
			
 
				+                const container = document.querySelector('.container');
			
 
				+                const scale = 1.5;
			
 
				+                const maxHeight = 5000 / scale;
			
 
				+
			
 
				+                try {
			
 
				+                    button.textContent = '分析中...';
			
 
				+                    button.disabled = true;
			
 
				+
			
 
				+                    // 获取所有可能的分割元素
			
 
				+                    const newsItems = Array.from(container.querySelectorAll('.news-item'));
			
 
				+                    const wordGroups = Array.from(container.querySelectorAll('.word-group'));
			
 
				+                    const newSection = container.querySelector('.new-section');
			
 
				+                    const errorSection = container.querySelector('.error-section');
			
 
				+                    const header = container.querySelector('.header');
			
 
				+                    const footer = container.querySelector('.footer');
			
 
				+
			
 
				+                    // 计算元素位置和高度
			
 
				+                    const containerRect = container.getBoundingClientRect();
			
 
				+                    const elements = [];
			
 
				+
			
 
				+                    // 添加header作为必须包含的元素
			
 
				+                    elements.push({
			
 
				+                        type: 'header',
			
 
				+                        element: header,
			
 
				+                        top: 0,
			
 
				+                        bottom: header.offsetHeight,
			
 
				+                        height: header.offsetHeight
			
 
				+                    });
			
 
				+
			
 
				+                    // 添加错误信息（如果存在）
			
 
				+                    if (errorSection) {
			
 
				+                        const rect = errorSection.getBoundingClientRect();
			
 
				+                        elements.push({
			
 
				+                            type: 'error',
			
 
				+                            element: errorSection,
			
 
				+                            top: rect.top - containerRect.top,
			
 
				+                            bottom: rect.bottom - containerRect.top,
			
 
				+                            height: rect.height
			
 
				+                        });
			
 
				+                    }
			
 
				+
			
 
				+                    // 按word-group分组处理news-item
			
 
				+                    wordGroups.forEach(group => {
			
 
				+                        const groupRect = group.getBoundingClientRect();
			
 
				+                        const groupNewsItems = group.querySelectorAll('.news-item');
			
 
				+
			
 
				+                        // 添加word-group的header部分
			
 
				+                        const wordHeader = group.querySelector('.word-header');
			
 
				+                        if (wordHeader) {
			
 
				+                            const headerRect = wordHeader.getBoundingClientRect();
			
 
				+                            elements.push({
			
 
				+                                type: 'word-header',
			
 
				+                                element: wordHeader,
			
 
				+                                parent: group,
			
 
				+                                top: groupRect.top - containerRect.top,
			
 
				+                                bottom: headerRect.bottom - containerRect.top,
			
 
				+                                height: headerRect.height
			
 
				+                            });
			
 
				+                        }
			
 
				+
			
 
				+                        // 添加每个news-item
			
 
				+                        groupNewsItems.forEach(item => {
			
 
				+                            const rect = item.getBoundingClientRect();
			
 
				+                            elements.push({
			
 
				+                                type: 'news-item',
			
 
				+                                element: item,
			
 
				+                                parent: group,
			
 
				+                                top: rect.top - containerRect.top,
			
 
				+                                bottom: rect.bottom - containerRect.top,
			
 
				+                                height: rect.height
			
 
				+                            });
			
 
				+                        });
			
 
				+                    });
			
 
				+
			
 
				+                    // 添加新增新闻部分
			
 
				+                    if (newSection) {
			
 
				+                        const rect = newSection.getBoundingClientRect();
			
 
				+                        elements.push({
			
 
				+                            type: 'new-section',
			
 
				+                            element: newSection,
			
 
				+                            top: rect.top - containerRect.top,
			
 
				+                            bottom: rect.bottom - containerRect.top,
			
 
				+                            height: rect.height
			
 
				+                        });
			
 
				+                    }
			
 
				+
			
 
				+                    // 添加footer
			
 
				+                    const footerRect = footer.getBoundingClientRect();
			
 
				+                    elements.push({
			
 
				+                        type: 'footer',
			
 
				+                        element: footer,
			
 
				+                        top: footerRect.top - containerRect.top,
			
 
				+                        bottom: footerRect.bottom - containerRect.top,
			
 
				+                        height: footer.offsetHeight
			
 
				+                    });
			
 
				+
			
 
				+                    // 计算分割点
			
 
				+                    const segments = [];
			
 
				+                    let currentSegment = { start: 0, end: 0, height: 0, includeHeader: true };
			
 
				+                    let headerHeight = header.offsetHeight;
			
 
				+                    currentSegment.height = headerHeight;
			
 
				+
			
 
				+                    for (let i = 1; i < elements.length; i++) {
			
 
				+                        const element = elements[i];
			
 
				+                        const potentialHeight = element.bottom - currentSegment.start;
			
 
				+
			
 
				+                        // 检查是否需要创建新分段
			
 
				+                        if (potentialHeight > maxHeight && currentSegment.height > headerHeight) {
			
 
				+                            // 在前一个元素结束处分割
			
 
				+                            currentSegment.end = elements[i - 1].bottom;
			
 
				+                            segments.push(currentSegment);
			
 
				+
			
 
				+                            // 开始新分段
			
 
				+                            currentSegment = {
			
 
				+                                start: currentSegment.end,
			
 
				+                                end: 0,
			
 
				+                                height: element.bottom - currentSegment.end,
			
 
				+                                includeHeader: false
			
 
				+                            };
			
 
				+                        } else {
			
 
				+                            currentSegment.height = potentialHeight;
			
 
				+                            currentSegment.end = element.bottom;
			
 
				+                        }
			
 
				+                    }
			
 
				+
			
 
				+                    // 添加最后一个分段
			
 
				+                    if (currentSegment.height > 0) {
			
 
				+                        currentSegment.end = container.offsetHeight;
			
 
				+                        segments.push(currentSegment);
			
 
				+                    }
			
 
				+
			
 
				+                    button.textContent = `生成中 (0/${segments.length})...`;
			
 
				+
			
 
				+                    // 隐藏保存按钮
			
 
				+                    const buttons = document.querySelector('.save-buttons');
			
 
				+                    buttons.style.visibility = 'hidden';
			
 
				+
			
 
				+                    // 为每个分段生成图片
			
 
				+                    const images = [];
			
 
				+                    for (let i = 0; i < segments.length; i++) {
			
 
				+                        const segment = segments[i];
			
 
				+                        button.textContent = `生成中 (${i + 1}/${segments.length})...`;
			
 
				+
			
 
				+                        // 创建临时容器用于截图
			
 
				+                        const tempContainer = document.createElement('div');
			
 
				+                        tempContainer.style.cssText = `
			
 
				+                            position: absolute;
			
 
				+                            left: -9999px;
			
 
				+                            top: 0;
			
 
				+                            width: ${container.offsetWidth}px;
			
 
				+                            background: white;
			
 
				+                        `;
			
 
				+                        tempContainer.className = 'container';
			
 
				+
			
 
				+                        // 克隆容器内容
			
 
				+                        const clonedContainer = container.cloneNode(true);
			
 
				+
			
 
				+                        // 移除克隆内容中的保存按钮
			
 
				+                        const clonedButtons = clonedContainer.querySelector('.save-buttons');
			
 
				+                        if (clonedButtons) {
			
 
				+                            clonedButtons.style.display = 'none';
			
 
				+                        }
			
 
				+
			
 
				+                        tempContainer.appendChild(clonedContainer);
			
 
				+                        document.body.appendChild(tempContainer);
			
 
				+
			
 
				+                        // 等待DOM更新
			
 
				+                        await new Promise(resolve => setTimeout(resolve, 100));
			
 
				+
			
 
				+                        // 使用html2canvas截取特定区域
			
 
				+                        const canvas = await html2canvas(clonedContainer, {
			
 
				+                            backgroundColor: '#ffffff',
			
 
				+                            scale: scale,
			
 
				+                            useCORS: true,
			
 
				+                            allowTaint: false,
			
 
				+                            imageTimeout: 10000,
			
 
				+                            logging: false,
			
 
				+                            width: container.offsetWidth,
			
 
				+                            height: segment.end - segment.start,
			
 
				+                            x: 0,
			
 
				+                            y: segment.start,
			
 
				+                            windowWidth: window.innerWidth,
			
 
				+                            windowHeight: window.innerHeight
			
 
				+                        });
			
 
				+
			
 
				+                        images.push(canvas.toDataURL('image/png', 1.0));
			
 
				+
			
 
				+                        // 清理临时容器
			
 
				+                        document.body.removeChild(tempContainer);
			
 
				+                    }
			
 
				+
			
 
				+                    // 恢复按钮显示
			
 
				+                    buttons.style.visibility = 'visible';
			
 
				+
			
 
				+                    // 下载所有图片
			
 
				+                    const now = new Date();
			
 
				+                    const baseFilename = `TrendRadar_热点新闻分析_${now.getFullYear()}${String(now.getMonth() + 1).padStart(2, '0')}${String(now.getDate()).padStart(2, '0')}_${String(now.getHours()).padStart(2, '0')}${String(now.getMinutes()).padStart(2, '0')}`;
			
 
				+
			
 
				+                    for (let i = 0; i < images.length; i++) {
			
 
				+                        const link = document.createElement('a');
			
 
				+                        link.download = `${baseFilename}_part${i + 1}.png`;
			
 
				+                        link.href = images[i];
			
 
				+                        document.body.appendChild(link);
			
 
				+                        link.click();
			
 
				+                        document.body.removeChild(link);
			
 
				+
			
 
				+                        // 延迟一下避免浏览器阻止多个下载
			
 
				+                        await new Promise(resolve => setTimeout(resolve, 100));
			
 
				+                    }
			
 
				+
			
 
				+                    button.textContent = `已保存 ${segments.length} 张图片!`;
			
 
				+                    setTimeout(() => {
			
 
				+                        button.textContent = originalText;
			
 
				+                        button.disabled = false;
			
 
				+                    }, 2000);
			
 
				+
			
 
				+                } catch (error) {
			
 
				+                    console.error('分段保存失败:', error);
			
 
				+                    const buttons = document.querySelector('.save-buttons');
			
 
				+                    buttons.style.visibility = 'visible';
			
 
				+                    button.textContent = '保存失败';
			
 
				+                    setTimeout(() => {
			
 
				+                        button.textContent = originalText;
			
 
				+                        button.disabled = false;
			
 
				+                    }, 2000);
			
 
				+                }
			
 
				+            }
			
 
				+
			
 
				+            document.addEventListener('DOMContentLoaded', function() {
			
 
				+                window.scrollTo(0, 0);
			
 
				+            });
			
 
				+        </script>
			
 
				+    </body>
			
 
				+    </html>
			
 
				+    """
			
 
				+
			
 
				+    return html
			
--- a/trendradar/storage/__init__.py
+++ b/trendradar/storage/__init__.py
@@ -0,0 +1,44 @@
 
				+# coding=utf-8
			
 
				+"""
			
 
				+存储模块 - 支持多种存储后端
			
 
				+
			
 
				+支持的存储后端:
			
 
				+- local: 本地 SQLite + TXT/HTML 文件
			
 
				+- remote: 远程云存储（S3 兼容协议：R2/OSS/COS/S3 等）
			
 
				+- auto: 根据环境自动选择（GitHub Actions 用 remote，其他用 local）
			
 
				+"""
			
 
				+
			
 
				+from trendradar.storage.base import (
			
 
				+    StorageBackend,
			
 
				+    NewsItem,
			
 
				+    NewsData,
			
 
				+    convert_crawl_results_to_news_data,
			
 
				+    convert_news_data_to_results,
			
 
				+)
			
 
				+from trendradar.storage.local import LocalStorageBackend
			
 
				+from trendradar.storage.manager import StorageManager, get_storage_manager
			
 
				+
			
 
				+# 远程后端可选导入（需要 boto3）
			
 
				+try:
			
 
				+    from trendradar.storage.remote import RemoteStorageBackend
			
 
				+    HAS_REMOTE = True
			
 
				+except ImportError:
			
 
				+    RemoteStorageBackend = None
			
 
				+    HAS_REMOTE = False
			
 
				+
			
 
				+__all__ = [
			
 
				+    # 基础类
			
 
				+    "StorageBackend",
			
 
				+    "NewsItem",
			
 
				+    "NewsData",
			
 
				+    # 转换函数
			
 
				+    "convert_crawl_results_to_news_data",
			
 
				+    "convert_news_data_to_results",
			
 
				+    # 后端实现
			
 
				+    "LocalStorageBackend",
			
 
				+    "RemoteStorageBackend",
			
 
				+    "HAS_REMOTE",
			
 
				+    # 管理器
			
 
				+    "StorageManager",
			
 
				+    "get_storage_manager",
			
 
				+]
			
--- a/trendradar/storage/base.py
+++ b/trendradar/storage/base.py
@@ -0,0 +1,457 @@
 
				+# coding=utf-8
			
 
				+"""
			
 
				+存储后端抽象基类和数据模型
			
 
				+
			
 
				+定义统一的存储接口，所有存储后端都需要实现这些方法
			
 
				+"""
			
 
				+
			
 
				+from abc import ABC, abstractmethod
			
 
				+from dataclasses import dataclass, field
			
 
				+from datetime import datetime
			
 
				+from typing import Dict, List, Optional, Any
			
 
				+import json
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class NewsItem:
			
 
				+    """新闻条目数据模型"""
			
 
				+
			
 
				+    title: str                          # 新闻标题
			
 
				+    source_id: str                      # 来源平台ID（如 toutiao, baidu）
			
 
				+    source_name: str = ""               # 来源平台名称（运行时使用，数据库不存储）
			
 
				+    rank: int = 0                       # 排名
			
 
				+    url: str = ""                       # 链接 URL
			
 
				+    mobile_url: str = ""                # 移动端 URL
			
 
				+    crawl_time: str = ""                # 抓取时间（HH:MM 格式）
			
 
				+
			
 
				+    # 统计信息（用于分析）
			
 
				+    ranks: List[int] = field(default_factory=list)  # 历史排名列表
			
 
				+    first_time: str = ""                # 首次出现时间
			
 
				+    last_time: str = ""                 # 最后出现时间
			
 
				+    count: int = 1                      # 出现次数
			
 
				+
			
 
				+    def to_dict(self) -> Dict[str, Any]:
			
 
				+        """转换为字典"""
			
 
				+        return {
			
 
				+            "title": self.title,
			
 
				+            "source_id": self.source_id,
			
 
				+            "source_name": self.source_name,
			
 
				+            "rank": self.rank,
			
 
				+            "url": self.url,
			
 
				+            "mobile_url": self.mobile_url,
			
 
				+            "crawl_time": self.crawl_time,
			
 
				+            "ranks": self.ranks,
			
 
				+            "first_time": self.first_time,
			
 
				+            "last_time": self.last_time,
			
 
				+            "count": self.count,
			
 
				+        }
			
 
				+
			
 
				+    @classmethod
			
 
				+    def from_dict(cls, data: Dict[str, Any]) -> "NewsItem":
			
 
				+        """从字典创建"""
			
 
				+        return cls(
			
 
				+            title=data.get("title", ""),
			
 
				+            source_id=data.get("source_id", ""),
			
 
				+            source_name=data.get("source_name", ""),
			
 
				+            rank=data.get("rank", 0),
			
 
				+            url=data.get("url", ""),
			
 
				+            mobile_url=data.get("mobile_url", ""),
			
 
				+            crawl_time=data.get("crawl_time", ""),
			
 
				+            ranks=data.get("ranks", []),
			
 
				+            first_time=data.get("first_time", ""),
			
 
				+            last_time=data.get("last_time", ""),
			
 
				+            count=data.get("count", 1),
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class NewsData:
			
 
				+    """
			
 
				+    新闻数据集合
			
 
				+
			
 
				+    结构:
			
 
				+    - date: 日期（YYYY-MM-DD）
			
 
				+    - crawl_time: 抓取时间（HH时MM分）
			
 
				+    - items: 按来源ID分组的新闻条目
			
 
				+    - id_to_name: 来源ID到名称的映射
			
 
				+    - failed_ids: 失败的来源ID列表
			
 
				+    """
			
 
				+
			
 
				+    date: str                                   # 日期
			
 
				+    crawl_time: str                             # 抓取时间
			
 
				+    items: Dict[str, List[NewsItem]]            # 按来源分组的新闻
			
 
				+    id_to_name: Dict[str, str] = field(default_factory=dict)   # ID到名称映射
			
 
				+    failed_ids: List[str] = field(default_factory=list)        # 失败的ID
			
 
				+
			
 
				+    def to_dict(self) -> Dict[str, Any]:
			
 
				+        """转换为字典"""
			
 
				+        items_dict = {}
			
 
				+        for source_id, news_list in self.items.items():
			
 
				+            items_dict[source_id] = [item.to_dict() for item in news_list]
			
 
				+
			
 
				+        return {
			
 
				+            "date": self.date,
			
 
				+            "crawl_time": self.crawl_time,
			
 
				+            "items": items_dict,
			
 
				+            "id_to_name": self.id_to_name,
			
 
				+            "failed_ids": self.failed_ids,
			
 
				+        }
			
 
				+
			
 
				+    @classmethod
			
 
				+    def from_dict(cls, data: Dict[str, Any]) -> "NewsData":
			
 
				+        """从字典创建"""
			
 
				+        items = {}
			
 
				+        items_data = data.get("items", {})
			
 
				+        for source_id, news_list in items_data.items():
			
 
				+            items[source_id] = [NewsItem.from_dict(item) for item in news_list]
			
 
				+
			
 
				+        return cls(
			
 
				+            date=data.get("date", ""),
			
 
				+            crawl_time=data.get("crawl_time", ""),
			
 
				+            items=items,
			
 
				+            id_to_name=data.get("id_to_name", {}),
			
 
				+            failed_ids=data.get("failed_ids", []),
			
 
				+        )
			
 
				+
			
 
				+    def get_total_count(self) -> int:
			
 
				+        """获取新闻总数"""
			
 
				+        return sum(len(news_list) for news_list in self.items.values())
			
 
				+
			
 
				+    def merge_with(self, other: "NewsData") -> "NewsData":
			
 
				+        """
			
 
				+        合并另一个 NewsData 到当前数据
			
 
				+
			
 
				+        合并规则:
			
 
				+        - 相同 source_id + title 的新闻合并排名历史
			
 
				+        - 更新 last_time 和 count
			
 
				+        - 保留较早的 first_time
			
 
				+        """
			
 
				+        merged_items = {}
			
 
				+
			
 
				+        # 复制当前数据
			
 
				+        for source_id, news_list in self.items.items():
			
 
				+            merged_items[source_id] = {item.title: item for item in news_list}
			
 
				+
			
 
				+        # 合并其他数据
			
 
				+        for source_id, news_list in other.items.items():
			
 
				+            if source_id not in merged_items:
			
 
				+                merged_items[source_id] = {}
			
 
				+
			
 
				+            for item in news_list:
			
 
				+                if item.title in merged_items[source_id]:
			
 
				+                    # 合并已存在的新闻
			
 
				+                    existing = merged_items[source_id][item.title]
			
 
				+
			
 
				+                    # 合并排名
			
 
				+                    existing_ranks = set(existing.ranks) if existing.ranks else set()
			
 
				+                    new_ranks = set(item.ranks) if item.ranks else set()
			
 
				+                    merged_ranks = sorted(existing_ranks | new_ranks)
			
 
				+                    existing.ranks = merged_ranks
			
 
				+
			
 
				+                    # 更新时间
			
 
				+                    if item.first_time and (not existing.first_time or item.first_time < existing.first_time):
			
 
				+                        existing.first_time = item.first_time
			
 
				+                    if item.last_time and (not existing.last_time or item.last_time > existing.last_time):
			
 
				+                        existing.last_time = item.last_time
			
 
				+
			
 
				+                    # 更新计数
			
 
				+                    existing.count += 1
			
 
				+
			
 
				+                    # 保留URL（如果原来没有）
			
 
				+                    if not existing.url and item.url:
			
 
				+                        existing.url = item.url
			
 
				+                    if not existing.mobile_url and item.mobile_url:
			
 
				+                        existing.mobile_url = item.mobile_url
			
 
				+                else:
			
 
				+                    # 添加新新闻
			
 
				+                    merged_items[source_id][item.title] = item
			
 
				+
			
 
				+        # 转换回列表格式
			
 
				+        final_items = {}
			
 
				+        for source_id, items_dict in merged_items.items():
			
 
				+            final_items[source_id] = list(items_dict.values())
			
 
				+
			
 
				+        # 合并 id_to_name
			
 
				+        merged_id_to_name = {**self.id_to_name, **other.id_to_name}
			
 
				+
			
 
				+        # 合并 failed_ids（去重）
			
 
				+        merged_failed_ids = list(set(self.failed_ids + other.failed_ids))
			
 
				+
			
 
				+        return NewsData(
			
 
				+            date=self.date or other.date,
			
 
				+            crawl_time=other.crawl_time,  # 使用较新的抓取时间
			
 
				+            items=final_items,
			
 
				+            id_to_name=merged_id_to_name,
			
 
				+            failed_ids=merged_failed_ids,
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+class StorageBackend(ABC):
			
 
				+    """
			
 
				+    存储后端抽象基类
			
 
				+
			
 
				+    所有存储后端都需要实现这些方法，以支持:
			
 
				+    - 保存新闻数据
			
 
				+    - 读取当天所有数据
			
 
				+    - 检测新增新闻
			
 
				+    - 生成报告文件（TXT/HTML）
			
 
				+    """
			
 
				+
			
 
				+    @abstractmethod
			
 
				+    def save_news_data(self, data: NewsData) -> bool:
			
 
				+        """
			
 
				+        保存新闻数据
			
 
				+
			
 
				+        Args:
			
 
				+            data: 新闻数据
			
 
				+
			
 
				+        Returns:
			
 
				+            是否保存成功
			
 
				+        """
			
 
				+        pass
			
 
				+
			
 
				+    @abstractmethod
			
 
				+    def get_today_all_data(self, date: Optional[str] = None) -> Optional[NewsData]:
			
 
				+        """
			
 
				+        获取指定日期的所有新闻数据
			
 
				+
			
 
				+        Args:
			
 
				+            date: 日期字符串（YYYY-MM-DD），默认为今天
			
 
				+
			
 
				+        Returns:
			
 
				+            合并后的新闻数据，如果没有数据返回 None
			
 
				+        """
			
 
				+        pass
			
 
				+
			
 
				+    @abstractmethod
			
 
				+    def get_latest_crawl_data(self, date: Optional[str] = None) -> Optional[NewsData]:
			
 
				+        """
			
 
				+        获取最新一次抓取的数据
			
 
				+
			
 
				+        Args:
			
 
				+            date: 日期字符串，默认为今天
			
 
				+
			
 
				+        Returns:
			
 
				+            最新抓取的新闻数据
			
 
				+        """
			
 
				+        pass
			
 
				+
			
 
				+    @abstractmethod
			
 
				+    def detect_new_titles(self, current_data: NewsData) -> Dict[str, Dict]:
			
 
				+        """
			
 
				+        检测新增的标题
			
 
				+
			
 
				+        Args:
			
 
				+            current_data: 当前抓取的数据
			
 
				+
			
 
				+        Returns:
			
 
				+            新增的标题数据，格式: {source_id: {title: title_data}}
			
 
				+        """
			
 
				+        pass
			
 
				+
			
 
				+    @abstractmethod
			
 
				+    def save_txt_snapshot(self, data: NewsData) -> Optional[str]:
			
 
				+        """
			
 
				+        保存 TXT 快照（可选功能，本地环境可用）
			
 
				+
			
 
				+        Args:
			
 
				+            data: 新闻数据
			
 
				+
			
 
				+        Returns:
			
 
				+            保存的文件路径，如果不支持返回 None
			
 
				+        """
			
 
				+        pass
			
 
				+
			
 
				+    @abstractmethod
			
 
				+    def save_html_report(self, html_content: str, filename: str, is_summary: bool = False) -> Optional[str]:
			
 
				+        """
			
 
				+        保存 HTML 报告
			
 
				+
			
 
				+        Args:
			
 
				+            html_content: HTML 内容
			
 
				+            filename: 文件名
			
 
				+            is_summary: 是否为汇总报告
			
 
				+
			
 
				+        Returns:
			
 
				+            保存的文件路径
			
 
				+        """
			
 
				+        pass
			
 
				+
			
 
				+    @abstractmethod
			
 
				+    def is_first_crawl_today(self, date: Optional[str] = None) -> bool:
			
 
				+        """
			
 
				+        检查是否是当天第一次抓取
			
 
				+
			
 
				+        Args:
			
 
				+            date: 日期字符串，默认为今天
			
 
				+
			
 
				+        Returns:
			
 
				+            是否是第一次抓取
			
 
				+        """
			
 
				+        pass
			
 
				+
			
 
				+    @abstractmethod
			
 
				+    def cleanup(self) -> None:
			
 
				+        """
			
 
				+        清理资源（如临时文件、数据库连接等）
			
 
				+        """
			
 
				+        pass
			
 
				+
			
 
				+    @abstractmethod
			
 
				+    def cleanup_old_data(self, retention_days: int) -> int:
			
 
				+        """
			
 
				+        清理过期数据
			
 
				+
			
 
				+        Args:
			
 
				+            retention_days: 保留天数（0 表示不清理）
			
 
				+
			
 
				+        Returns:
			
 
				+            删除的日期目录数量
			
 
				+        """
			
 
				+        pass
			
 
				+
			
 
				+    @property
			
 
				+    @abstractmethod
			
 
				+    def backend_name(self) -> str:
			
 
				+        """
			
 
				+        存储后端名称
			
 
				+        """
			
 
				+        pass
			
 
				+
			
 
				+    @property
			
 
				+    @abstractmethod
			
 
				+    def supports_txt(self) -> bool:
			
 
				+        """
			
 
				+        是否支持生成 TXT 快照
			
 
				+        """
			
 
				+        pass
			
 
				+
			
 
				+    # === 推送记录相关方法 ===
			
 
				+
			
 
				+    @abstractmethod
			
 
				+    def has_pushed_today(self, date: Optional[str] = None) -> bool:
			
 
				+        """
			
 
				+        检查指定日期是否已推送过
			
 
				+
			
 
				+        Args:
			
 
				+            date: 日期字符串（YYYY-MM-DD），默认为今天
			
 
				+
			
 
				+        Returns:
			
 
				+            是否已推送
			
 
				+        """
			
 
				+        pass
			
 
				+
			
 
				+    @abstractmethod
			
 
				+    def record_push(self, report_type: str, date: Optional[str] = None) -> bool:
			
 
				+        """
			
 
				+        记录推送
			
 
				+
			
 
				+        Args:
			
 
				+            report_type: 报告类型
			
 
				+            date: 日期字符串（YYYY-MM-DD），默认为今天
			
 
				+
			
 
				+        Returns:
			
 
				+            是否记录成功
			
 
				+        """
			
 
				+        pass
			
 
				+
			
 
				+
			
 
				+def convert_crawl_results_to_news_data(
			
 
				+    results: Dict[str, Dict],
			
 
				+    id_to_name: Dict[str, str],
			
 
				+    failed_ids: List[str],
			
 
				+    crawl_time: str,
			
 
				+    crawl_date: str,
			
 
				+) -> NewsData:
			
 
				+    """
			
 
				+    将爬虫结果转换为 NewsData 格式
			
 
				+
			
 
				+    Args:
			
 
				+        results: 爬虫返回的结果 {source_id: {title: {ranks: [], url: "", mobileUrl: ""}}}
			
 
				+        id_to_name: 来源ID到名称的映射
			
 
				+        failed_ids: 失败的来源ID
			
 
				+        crawl_time: 抓取时间（HH:MM）
			
 
				+        crawl_date: 抓取日期（YYYY-MM-DD）
			
 
				+
			
 
				+    Returns:
			
 
				+        NewsData 对象
			
 
				+    """
			
 
				+    items = {}
			
 
				+
			
 
				+    for source_id, titles_data in results.items():
			
 
				+        source_name = id_to_name.get(source_id, source_id)
			
 
				+        news_list = []
			
 
				+
			
 
				+        for title, data in titles_data.items():
			
 
				+            if isinstance(data, dict):
			
 
				+                ranks = data.get("ranks", [])
			
 
				+                url = data.get("url", "")
			
 
				+                mobile_url = data.get("mobileUrl", "")
			
 
				+            else:
			
 
				+                # 兼容旧格式
			
 
				+                ranks = data if isinstance(data, list) else []
			
 
				+                url = ""
			
 
				+                mobile_url = ""
			
 
				+
			
 
				+            rank = ranks[0] if ranks else 99
			
 
				+
			
 
				+            news_item = NewsItem(
			
 
				+                title=title,
			
 
				+                source_id=source_id,
			
 
				+                source_name=source_name,
			
 
				+                rank=rank,
			
 
				+                url=url,
			
 
				+                mobile_url=mobile_url,
			
 
				+                crawl_time=crawl_time,
			
 
				+                ranks=ranks,
			
 
				+                first_time=crawl_time,
			
 
				+                last_time=crawl_time,
			
 
				+                count=1,
			
 
				+            )
			
 
				+            news_list.append(news_item)
			
 
				+
			
 
				+        items[source_id] = news_list
			
 
				+
			
 
				+    return NewsData(
			
 
				+        date=crawl_date,
			
 
				+        crawl_time=crawl_time,
			
 
				+        items=items,
			
 
				+        id_to_name=id_to_name,
			
 
				+        failed_ids=failed_ids,
			
 
				+    )
			
 
				+
			
 
				+
			
 
				+def convert_news_data_to_results(data: NewsData) -> tuple:
			
 
				+    """
			
 
				+    将 NewsData 转换回原有的 results 格式（用于兼容现有代码）
			
 
				+
			
 
				+    Args:
			
 
				+        data: NewsData 对象
			
 
				+
			
 
				+    Returns:
			
 
				+        (results, id_to_name, title_info) 元组
			
 
				+    """
			
 
				+    results = {}
			
 
				+    title_info = {}
			
 
				+
			
 
				+    for source_id, news_list in data.items.items():
			
 
				+        results[source_id] = {}
			
 
				+        title_info[source_id] = {}
			
 
				+
			
 
				+        for item in news_list:
			
 
				+            results[source_id][item.title] = {
			
 
				+                "ranks": item.ranks,
			
 
				+                "url": item.url,
			
 
				+                "mobileUrl": item.mobile_url,
			
 
				+            }
			
 
				+
			
 
				+            title_info[source_id][item.title] = {
			
 
				+                "first_time": item.first_time,
			
 
				+                "last_time": item.last_time,
			
 
				+                "count": item.count,
			
 
				+                "ranks": item.ranks,
			
 
				+                "url": item.url,
			
 
				+                "mobileUrl": item.mobile_url,
			
 
				+            }
			
 
				+
			
 
				+    return results, data.id_to_name, title_info
			
--- a/trendradar/storage/local.py
+++ b/trendradar/storage/local.py
@@ -0,0 +1,869 @@
 
				+# coding=utf-8
			
 
				+"""
			
 
				+本地存储后端 - SQLite + TXT/HTML
			
 
				+
			
 
				+使用 SQLite 作为主存储，支持可选的 TXT 快照和 HTML 报告
			
 
				+"""
			
 
				+
			
 
				+import sqlite3
			
 
				+import os
			
 
				+import shutil
			
 
				+import pytz
			
 
				+import re
			
 
				+from datetime import datetime, timedelta
			
 
				+from pathlib import Path
			
 
				+from typing import Dict, List, Optional, Any
			
 
				+
			
 
				+from trendradar.storage.base import StorageBackend, NewsItem, NewsData
			
 
				+from trendradar.utils.time import (
			
 
				+    get_configured_time,
			
 
				+    format_date_folder,
			
 
				+    format_time_filename,
			
 
				+)
			
 
				+
			
 
				+
			
 
				+class LocalStorageBackend(StorageBackend):
			
 
				+    """
			
 
				+    本地存储后端
			
 
				+
			
 
				+    使用 SQLite 数据库存储新闻数据，支持：
			
 
				+    - 按日期组织的 SQLite 数据库文件
			
 
				+    - 可选的 TXT 快照（用于调试）
			
 
				+    - HTML 报告生成
			
 
				+    """
			
 
				+
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+        data_dir: str = "output",
			
 
				+        enable_txt: bool = True,
			
 
				+        enable_html: bool = True,
			
 
				+        timezone: str = "Asia/Shanghai",
			
 
				+    ):
			
 
				+        """
			
 
				+        初始化本地存储后端
			
 
				+
			
 
				+        Args:
			
 
				+            data_dir: 数据目录路径
			
 
				+            enable_txt: 是否启用 TXT 快照
			
 
				+            enable_html: 是否启用 HTML 报告
			
 
				+            timezone: 时区配置（默认 Asia/Shanghai）
			
 
				+        """
			
 
				+        self.data_dir = Path(data_dir)
			
 
				+        self.enable_txt = enable_txt
			
 
				+        self.enable_html = enable_html
			
 
				+        self.timezone = timezone
			
 
				+        self._db_connections: Dict[str, sqlite3.Connection] = {}
			
 
				+
			
 
				+    @property
			
 
				+    def backend_name(self) -> str:
			
 
				+        return "local"
			
 
				+
			
 
				+    @property
			
 
				+    def supports_txt(self) -> bool:
			
 
				+        return self.enable_txt
			
 
				+
			
 
				+    def _get_configured_time(self) -> datetime:
			
 
				+        """获取配置时区的当前时间"""
			
 
				+        return get_configured_time(self.timezone)
			
 
				+
			
 
				+    def _format_date_folder(self, date: Optional[str] = None) -> str:
			
 
				+        """格式化日期文件夹名 (ISO 格式: YYYY-MM-DD)"""
			
 
				+        return format_date_folder(date, self.timezone)
			
 
				+
			
 
				+    def _format_time_filename(self) -> str:
			
 
				+        """格式化时间文件名 (格式: HH-MM)"""
			
 
				+        return format_time_filename(self.timezone)
			
 
				+
			
 
				+    def _get_db_path(self, date: Optional[str] = None) -> Path:
			
 
				+        """获取 SQLite 数据库路径"""
			
 
				+        date_folder = self._format_date_folder(date)
			
 
				+        db_dir = self.data_dir / date_folder
			
 
				+        db_dir.mkdir(parents=True, exist_ok=True)
			
 
				+        return db_dir / "news.db"
			
 
				+
			
 
				+    def _get_connection(self, date: Optional[str] = None) -> sqlite3.Connection:
			
 
				+        """获取数据库连接（带缓存）"""
			
 
				+        db_path = str(self._get_db_path(date))
			
 
				+
			
 
				+        if db_path not in self._db_connections:
			
 
				+            conn = sqlite3.connect(db_path)
			
 
				+            conn.row_factory = sqlite3.Row
			
 
				+            self._init_tables(conn)
			
 
				+            self._db_connections[db_path] = conn
			
 
				+
			
 
				+        return self._db_connections[db_path]
			
 
				+
			
 
				+    def _get_schema_path(self) -> Path:
			
 
				+        """获取 schema.sql 文件路径"""
			
 
				+        return Path(__file__).parent / "schema.sql"
			
 
				+
			
 
				+    def _init_tables(self, conn: sqlite3.Connection) -> None:
			
 
				+        """从 schema.sql 初始化数据库表结构"""
			
 
				+        schema_path = self._get_schema_path()
			
 
				+        
			
 
				+        if schema_path.exists():
			
 
				+            with open(schema_path, "r", encoding="utf-8") as f:
			
 
				+                schema_sql = f.read()
			
 
				+            conn.executescript(schema_sql)
			
 
				+        else:
			
 
				+            raise FileNotFoundError(f"Schema file not found: {schema_path}")
			
 
				+        
			
 
				+        conn.commit()
			
 
				+
			
 
				+    def save_news_data(self, data: NewsData) -> bool:
			
 
				+        """
			
 
				+        保存新闻数据到 SQLite（以 URL 为唯一标识，支持标题更新检测）
			
 
				+
			
 
				+        Args:
			
 
				+            data: 新闻数据
			
 
				+
			
 
				+        Returns:
			
 
				+            是否保存成功
			
 
				+        """
			
 
				+        try:
			
 
				+            conn = self._get_connection(data.date)
			
 
				+            cursor = conn.cursor()
			
 
				+
			
 
				+            # 获取配置时区的当前时间
			
 
				+            now_str = self._get_configured_time().strftime("%Y-%m-%d %H:%M:%S")
			
 
				+
			
 
				+            # 首先同步平台信息到 platforms 表
			
 
				+            for source_id, source_name in data.id_to_name.items():
			
 
				+                cursor.execute("""
			
 
				+                    INSERT INTO platforms (id, name, updated_at)
			
 
				+                    VALUES (?, ?, ?)
			
 
				+                    ON CONFLICT(id) DO UPDATE SET
			
 
				+                        name = excluded.name,
			
 
				+                        updated_at = excluded.updated_at
			
 
				+                """, (source_id, source_name, now_str))
			
 
				+
			
 
				+            # 统计计数器
			
 
				+            new_count = 0
			
 
				+            updated_count = 0
			
 
				+            title_changed_count = 0
			
 
				+            success_sources = []
			
 
				+
			
 
				+            for source_id, news_list in data.items.items():
			
 
				+                success_sources.append(source_id)
			
 
				+
			
 
				+                for item in news_list:
			
 
				+                    try:
			
 
				+                        # 检查是否已存在（通过 URL + platform_id）
			
 
				+                        if item.url:
			
 
				+                            cursor.execute("""
			
 
				+                                SELECT id, title FROM news_items
			
 
				+                                WHERE url = ? AND platform_id = ?
			
 
				+                            """, (item.url, source_id))
			
 
				+                            existing = cursor.fetchone()
			
 
				+
			
 
				+                            if existing:
			
 
				+                                # 已存在，更新记录
			
 
				+                                existing_id, existing_title = existing
			
 
				+
			
 
				+                                # 检查标题是否变化
			
 
				+                                if existing_title != item.title:
			
 
				+                                    # 记录标题变更
			
 
				+                                    cursor.execute("""
			
 
				+                                        INSERT INTO title_changes
			
 
				+                                        (news_item_id, old_title, new_title, changed_at)
			
 
				+                                        VALUES (?, ?, ?, ?)
			
 
				+                                    """, (existing_id, existing_title, item.title, now_str))
			
 
				+                                    title_changed_count += 1
			
 
				+
			
 
				+                                # 记录排名历史
			
 
				+                                cursor.execute("""
			
 
				+                                    INSERT INTO rank_history
			
 
				+                                    (news_item_id, rank, crawl_time, created_at)
			
 
				+                                    VALUES (?, ?, ?, ?)
			
 
				+                                """, (existing_id, item.rank, data.crawl_time, now_str))
			
 
				+
			
 
				+                                # 更新现有记录
			
 
				+                                cursor.execute("""
			
 
				+                                    UPDATE news_items SET
			
 
				+                                        title = ?,
			
 
				+                                        rank = ?,
			
 
				+                                        mobile_url = ?,
			
 
				+                                        last_crawl_time = ?,
			
 
				+                                        crawl_count = crawl_count + 1,
			
 
				+                                        updated_at = ?
			
 
				+                                    WHERE id = ?
			
 
				+                                """, (item.title, item.rank, item.mobile_url,
			
 
				+                                      data.crawl_time, now_str, existing_id))
			
 
				+                                updated_count += 1
			
 
				+                            else:
			
 
				+                                # 不存在，插入新记录
			
 
				+                                cursor.execute("""
			
 
				+                                    INSERT INTO news_items
			
 
				+                                    (title, platform_id, rank, url, mobile_url,
			
 
				+                                     first_crawl_time, last_crawl_time, crawl_count,
			
 
				+                                     created_at, updated_at)
			
 
				+                                    VALUES (?, ?, ?, ?, ?, ?, ?, 1, ?, ?)
			
 
				+                                """, (item.title, source_id, item.rank, item.url,
			
 
				+                                      item.mobile_url, data.crawl_time, data.crawl_time,
			
 
				+                                      now_str, now_str))
			
 
				+                                new_id = cursor.lastrowid
			
 
				+                                # 记录初始排名
			
 
				+                                cursor.execute("""
			
 
				+                                    INSERT INTO rank_history
			
 
				+                                    (news_item_id, rank, crawl_time, created_at)
			
 
				+                                    VALUES (?, ?, ?, ?)
			
 
				+                                """, (new_id, item.rank, data.crawl_time, now_str))
			
 
				+                                new_count += 1
			
 
				+                        else:
			
 
				+                            # URL 为空的情况，直接插入（不做去重）
			
 
				+                            cursor.execute("""
			
 
				+                                INSERT INTO news_items
			
 
				+                                (title, platform_id, rank, url, mobile_url,
			
 
				+                                 first_crawl_time, last_crawl_time, crawl_count,
			
 
				+                                 created_at, updated_at)
			
 
				+                                VALUES (?, ?, ?, ?, ?, ?, ?, 1, ?, ?)
			
 
				+                            """, (item.title, source_id, item.rank, item.url,
			
 
				+                                  item.mobile_url, data.crawl_time, data.crawl_time,
			
 
				+                                  now_str, now_str))
			
 
				+                            new_id = cursor.lastrowid
			
 
				+                            # 记录初始排名
			
 
				+                            cursor.execute("""
			
 
				+                                INSERT INTO rank_history
			
 
				+                                (news_item_id, rank, crawl_time, created_at)
			
 
				+                                VALUES (?, ?, ?, ?)
			
 
				+                            """, (new_id, item.rank, data.crawl_time, now_str))
			
 
				+                            new_count += 1
			
 
				+
			
 
				+                    except sqlite3.Error as e:
			
 
				+                        print(f"保存新闻条目失败 [{item.title[:30]}...]: {e}")
			
 
				+
			
 
				+            total_items = new_count + updated_count
			
 
				+
			
 
				+            # 记录抓取信息
			
 
				+            cursor.execute("""
			
 
				+                INSERT OR REPLACE INTO crawl_records
			
 
				+                (crawl_time, total_items, created_at)
			
 
				+                VALUES (?, ?, ?)
			
 
				+            """, (data.crawl_time, total_items, now_str))
			
 
				+
			
 
				+            # 获取刚插入的 crawl_record 的 ID
			
 
				+            cursor.execute("""
			
 
				+                SELECT id FROM crawl_records WHERE crawl_time = ?
			
 
				+            """, (data.crawl_time,))
			
 
				+            record_row = cursor.fetchone()
			
 
				+            if record_row:
			
 
				+                crawl_record_id = record_row[0]
			
 
				+
			
 
				+                # 记录成功的来源
			
 
				+                for source_id in success_sources:
			
 
				+                    cursor.execute("""
			
 
				+                        INSERT OR REPLACE INTO crawl_source_status
			
 
				+                        (crawl_record_id, platform_id, status)
			
 
				+                        VALUES (?, ?, 'success')
			
 
				+                    """, (crawl_record_id, source_id))
			
 
				+
			
 
				+                # 记录失败的来源
			
 
				+                for failed_id in data.failed_ids:
			
 
				+                    # 确保失败的平台也在 platforms 表中
			
 
				+                    cursor.execute("""
			
 
				+                        INSERT OR IGNORE INTO platforms (id, name, updated_at)
			
 
				+                        VALUES (?, ?, ?)
			
 
				+                    """, (failed_id, failed_id, now_str))
			
 
				+
			
 
				+                    cursor.execute("""
			
 
				+                        INSERT OR REPLACE INTO crawl_source_status
			
 
				+                        (crawl_record_id, platform_id, status)
			
 
				+                        VALUES (?, ?, 'failed')
			
 
				+                    """, (crawl_record_id, failed_id))
			
 
				+
			
 
				+            conn.commit()
			
 
				+
			
 
				+            # 输出详细的存储统计日志
			
 
				+            log_parts = [f"[本地存储] 处理完成：新增 {new_count} 条"]
			
 
				+            if updated_count > 0:
			
 
				+                log_parts.append(f"更新 {updated_count} 条")
			
 
				+            if title_changed_count > 0:
			
 
				+                log_parts.append(f"标题变更 {title_changed_count} 条")
			
 
				+            print("，".join(log_parts))
			
 
				+
			
 
				+            return True
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            print(f"[本地存储] 保存失败: {e}")
			
 
				+            return False
			
 
				+
			
 
				+    def get_today_all_data(self, date: Optional[str] = None) -> Optional[NewsData]:
			
 
				+        """
			
 
				+        获取指定日期的所有新闻数据（合并后）
			
 
				+
			
 
				+        Args:
			
 
				+            date: 日期字符串，默认为今天
			
 
				+
			
 
				+        Returns:
			
 
				+            合并后的新闻数据
			
 
				+        """
			
 
				+        try:
			
 
				+            db_path = self._get_db_path(date)
			
 
				+            if not db_path.exists():
			
 
				+                return None
			
 
				+
			
 
				+            conn = self._get_connection(date)
			
 
				+            cursor = conn.cursor()
			
 
				+
			
 
				+            # 获取所有新闻数据（包含 id 用于查询排名历史）
			
 
				+            cursor.execute("""
			
 
				+                SELECT n.id, n.title, n.platform_id, p.name as platform_name,
			
 
				+                       n.rank, n.url, n.mobile_url,
			
 
				+                       n.first_crawl_time, n.last_crawl_time, n.crawl_count
			
 
				+                FROM news_items n
			
 
				+                LEFT JOIN platforms p ON n.platform_id = p.id
			
 
				+                ORDER BY n.platform_id, n.last_crawl_time
			
 
				+            """)
			
 
				+
			
 
				+            rows = cursor.fetchall()
			
 
				+            if not rows:
			
 
				+                return None
			
 
				+
			
 
				+            # 收集所有 news_item_id
			
 
				+            news_ids = [row[0] for row in rows]
			
 
				+
			
 
				+            # 批量查询排名历史
			
 
				+            rank_history_map: Dict[int, List[int]] = {}
			
 
				+            if news_ids:
			
 
				+                placeholders = ",".join("?" * len(news_ids))
			
 
				+                cursor.execute(f"""
			
 
				+                    SELECT news_item_id, rank FROM rank_history
			
 
				+                    WHERE news_item_id IN ({placeholders})
			
 
				+                    ORDER BY news_item_id, crawl_time
			
 
				+                """, news_ids)
			
 
				+                for rh_row in cursor.fetchall():
			
 
				+                    news_id, rank = rh_row[0], rh_row[1]
			
 
				+                    if news_id not in rank_history_map:
			
 
				+                        rank_history_map[news_id] = []
			
 
				+                    if rank not in rank_history_map[news_id]:
			
 
				+                        rank_history_map[news_id].append(rank)
			
 
				+
			
 
				+            # 按 platform_id 分组
			
 
				+            items: Dict[str, List[NewsItem]] = {}
			
 
				+            id_to_name: Dict[str, str] = {}
			
 
				+            crawl_date = self._format_date_folder(date)
			
 
				+
			
 
				+            for row in rows:
			
 
				+                news_id = row[0]
			
 
				+                platform_id = row[2]
			
 
				+                title = row[1]
			
 
				+                platform_name = row[3] or platform_id
			
 
				+
			
 
				+                id_to_name[platform_id] = platform_name
			
 
				+
			
 
				+                if platform_id not in items:
			
 
				+                    items[platform_id] = []
			
 
				+
			
 
				+                # 获取排名历史，如果没有则使用当前排名
			
 
				+                ranks = rank_history_map.get(news_id, [row[4]])
			
 
				+
			
 
				+                items[platform_id].append(NewsItem(
			
 
				+                    title=title,
			
 
				+                    source_id=platform_id,
			
 
				+                    source_name=platform_name,
			
 
				+                    rank=row[4],
			
 
				+                    url=row[5] or "",
			
 
				+                    mobile_url=row[6] or "",
			
 
				+                    crawl_time=row[8],  # last_crawl_time
			
 
				+                    ranks=ranks,
			
 
				+                    first_time=row[7],  # first_crawl_time
			
 
				+                    last_time=row[8],   # last_crawl_time
			
 
				+                    count=row[9],       # crawl_count
			
 
				+                ))
			
 
				+
			
 
				+            final_items = items
			
 
				+
			
 
				+            # 获取失败的来源
			
 
				+            cursor.execute("""
			
 
				+                SELECT DISTINCT css.platform_id
			
 
				+                FROM crawl_source_status css
			
 
				+                JOIN crawl_records cr ON css.crawl_record_id = cr.id
			
 
				+                WHERE css.status = 'failed'
			
 
				+            """)
			
 
				+            failed_ids = [row[0] for row in cursor.fetchall()]
			
 
				+
			
 
				+            # 获取最新的抓取时间
			
 
				+            cursor.execute("""
			
 
				+                SELECT crawl_time FROM crawl_records
			
 
				+                ORDER BY crawl_time DESC
			
 
				+                LIMIT 1
			
 
				+            """)
			
 
				+
			
 
				+            time_row = cursor.fetchone()
			
 
				+            crawl_time = time_row[0] if time_row else self._format_time_filename()
			
 
				+
			
 
				+            return NewsData(
			
 
				+                date=crawl_date,
			
 
				+                crawl_time=crawl_time,
			
 
				+                items=final_items,
			
 
				+                id_to_name=id_to_name,
			
 
				+                failed_ids=failed_ids,
			
 
				+            )
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            print(f"[本地存储] 读取数据失败: {e}")
			
 
				+            return None
			
 
				+
			
 
				+    def get_latest_crawl_data(self, date: Optional[str] = None) -> Optional[NewsData]:
			
 
				+        """
			
 
				+        获取最新一次抓取的数据
			
 
				+
			
 
				+        Args:
			
 
				+            date: 日期字符串，默认为今天
			
 
				+
			
 
				+        Returns:
			
 
				+            最新抓取的新闻数据
			
 
				+        """
			
 
				+        try:
			
 
				+            db_path = self._get_db_path(date)
			
 
				+            if not db_path.exists():
			
 
				+                return None
			
 
				+
			
 
				+            conn = self._get_connection(date)
			
 
				+            cursor = conn.cursor()
			
 
				+
			
 
				+            # 获取最新的抓取时间
			
 
				+            cursor.execute("""
			
 
				+                SELECT crawl_time FROM crawl_records
			
 
				+                ORDER BY crawl_time DESC
			
 
				+                LIMIT 1
			
 
				+            """)
			
 
				+
			
 
				+            time_row = cursor.fetchone()
			
 
				+            if not time_row:
			
 
				+                return None
			
 
				+
			
 
				+            latest_time = time_row[0]
			
 
				+
			
 
				+            # 获取该时间的新闻数据（包含 id 用于查询排名历史）
			
 
				+            cursor.execute("""
			
 
				+                SELECT n.id, n.title, n.platform_id, p.name as platform_name,
			
 
				+                       n.rank, n.url, n.mobile_url,
			
 
				+                       n.first_crawl_time, n.last_crawl_time, n.crawl_count
			
 
				+                FROM news_items n
			
 
				+                LEFT JOIN platforms p ON n.platform_id = p.id
			
 
				+                WHERE n.last_crawl_time = ?
			
 
				+            """, (latest_time,))
			
 
				+
			
 
				+            rows = cursor.fetchall()
			
 
				+            if not rows:
			
 
				+                return None
			
 
				+
			
 
				+            # 收集所有 news_item_id
			
 
				+            news_ids = [row[0] for row in rows]
			
 
				+
			
 
				+            # 批量查询排名历史
			
 
				+            rank_history_map: Dict[int, List[int]] = {}
			
 
				+            if news_ids:
			
 
				+                placeholders = ",".join("?" * len(news_ids))
			
 
				+                cursor.execute(f"""
			
 
				+                    SELECT news_item_id, rank FROM rank_history
			
 
				+                    WHERE news_item_id IN ({placeholders})
			
 
				+                    ORDER BY news_item_id, crawl_time
			
 
				+                """, news_ids)
			
 
				+                for rh_row in cursor.fetchall():
			
 
				+                    news_id, rank = rh_row[0], rh_row[1]
			
 
				+                    if news_id not in rank_history_map:
			
 
				+                        rank_history_map[news_id] = []
			
 
				+                    if rank not in rank_history_map[news_id]:
			
 
				+                        rank_history_map[news_id].append(rank)
			
 
				+
			
 
				+            items: Dict[str, List[NewsItem]] = {}
			
 
				+            id_to_name: Dict[str, str] = {}
			
 
				+            crawl_date = self._format_date_folder(date)
			
 
				+
			
 
				+            for row in rows:
			
 
				+                news_id = row[0]
			
 
				+                platform_id = row[2]
			
 
				+                platform_name = row[3] or platform_id
			
 
				+                id_to_name[platform_id] = platform_name
			
 
				+
			
 
				+                if platform_id not in items:
			
 
				+                    items[platform_id] = []
			
 
				+
			
 
				+                # 获取排名历史，如果没有则使用当前排名
			
 
				+                ranks = rank_history_map.get(news_id, [row[4]])
			
 
				+
			
 
				+                items[platform_id].append(NewsItem(
			
 
				+                    title=row[1],
			
 
				+                    source_id=platform_id,
			
 
				+                    source_name=platform_name,
			
 
				+                    rank=row[4],
			
 
				+                    url=row[5] or "",
			
 
				+                    mobile_url=row[6] or "",
			
 
				+                    crawl_time=row[8],  # last_crawl_time
			
 
				+                    ranks=ranks,
			
 
				+                    first_time=row[7],  # first_crawl_time
			
 
				+                    last_time=row[8],   # last_crawl_time
			
 
				+                    count=row[9],       # crawl_count
			
 
				+                ))
			
 
				+
			
 
				+            # 获取失败的来源（针对最新一次抓取）
			
 
				+            cursor.execute("""
			
 
				+                SELECT css.platform_id
			
 
				+                FROM crawl_source_status css
			
 
				+                JOIN crawl_records cr ON css.crawl_record_id = cr.id
			
 
				+                WHERE cr.crawl_time = ? AND css.status = 'failed'
			
 
				+            """, (latest_time,))
			
 
				+
			
 
				+            failed_ids = [row[0] for row in cursor.fetchall()]
			
 
				+
			
 
				+            return NewsData(
			
 
				+                date=crawl_date,
			
 
				+                crawl_time=latest_time,
			
 
				+                items=items,
			
 
				+                id_to_name=id_to_name,
			
 
				+                failed_ids=failed_ids,
			
 
				+            )
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            print(f"[本地存储] 获取最新数据失败: {e}")
			
 
				+            return None
			
 
				+
			
 
				+    def detect_new_titles(self, current_data: NewsData) -> Dict[str, Dict]:
			
 
				+        """
			
 
				+        检测新增的标题
			
 
				+
			
 
				+        Args:
			
 
				+            current_data: 当前抓取的数据
			
 
				+
			
 
				+        Returns:
			
 
				+            新增的标题数据 {source_id: {title: NewsItem}}
			
 
				+        """
			
 
				+        try:
			
 
				+            # 获取历史数据
			
 
				+            historical_data = self.get_today_all_data(current_data.date)
			
 
				+
			
 
				+            if not historical_data:
			
 
				+                # 没有历史数据，所有都是新的
			
 
				+                new_titles = {}
			
 
				+                for source_id, news_list in current_data.items.items():
			
 
				+                    new_titles[source_id] = {item.title: item for item in news_list}
			
 
				+                return new_titles
			
 
				+
			
 
				+            # 收集历史标题
			
 
				+            historical_titles: Dict[str, set] = {}
			
 
				+            for source_id, news_list in historical_data.items.items():
			
 
				+                historical_titles[source_id] = {item.title for item in news_list}
			
 
				+
			
 
				+            # 检测新增
			
 
				+            new_titles = {}
			
 
				+            for source_id, news_list in current_data.items.items():
			
 
				+                hist_set = historical_titles.get(source_id, set())
			
 
				+                for item in news_list:
			
 
				+                    if item.title not in hist_set:
			
 
				+                        if source_id not in new_titles:
			
 
				+                            new_titles[source_id] = {}
			
 
				+                        new_titles[source_id][item.title] = item
			
 
				+
			
 
				+            return new_titles
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            print(f"[本地存储] 检测新标题失败: {e}")
			
 
				+            return {}
			
 
				+
			
 
				+    def save_txt_snapshot(self, data: NewsData) -> Optional[str]:
			
 
				+        """
			
 
				+        保存 TXT 快照
			
 
				+
			
 
				+        Args:
			
 
				+            data: 新闻数据
			
 
				+
			
 
				+        Returns:
			
 
				+            保存的文件路径
			
 
				+        """
			
 
				+        if not self.enable_txt:
			
 
				+            return None
			
 
				+
			
 
				+        try:
			
 
				+            date_folder = self._format_date_folder(data.date)
			
 
				+            txt_dir = self.data_dir / date_folder / "txt"
			
 
				+            txt_dir.mkdir(parents=True, exist_ok=True)
			
 
				+
			
 
				+            file_path = txt_dir / f"{data.crawl_time}.txt"
			
 
				+
			
 
				+            with open(file_path, "w", encoding="utf-8") as f:
			
 
				+                for source_id, news_list in data.items.items():
			
 
				+                    source_name = data.id_to_name.get(source_id, source_id)
			
 
				+
			
 
				+                    # 写入来源标题
			
 
				+                    if source_name and source_name != source_id:
			
 
				+                        f.write(f"{source_id} | {source_name}\n")
			
 
				+                    else:
			
 
				+                        f.write(f"{source_id}\n")
			
 
				+
			
 
				+                    # 按排名排序
			
 
				+                    sorted_news = sorted(news_list, key=lambda x: x.rank)
			
 
				+
			
 
				+                    for item in sorted_news:
			
 
				+                        line = f"{item.rank}. {item.title}"
			
 
				+                        if item.url:
			
 
				+                            line += f" [URL:{item.url}]"
			
 
				+                        if item.mobile_url:
			
 
				+                            line += f" [MOBILE:{item.mobile_url}]"
			
 
				+                        f.write(line + "\n")
			
 
				+
			
 
				+                    f.write("\n")
			
 
				+
			
 
				+                # 写入失败的来源
			
 
				+                if data.failed_ids:
			
 
				+                    f.write("==== 以下ID请求失败 ====\n")
			
 
				+                    for failed_id in data.failed_ids:
			
 
				+                        f.write(f"{failed_id}\n")
			
 
				+
			
 
				+            print(f"[本地存储] TXT 快照已保存: {file_path}")
			
 
				+            return str(file_path)
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            print(f"[本地存储] 保存 TXT 快照失败: {e}")
			
 
				+            return None
			
 
				+
			
 
				+    def save_html_report(self, html_content: str, filename: str, is_summary: bool = False) -> Optional[str]:
			
 
				+        """
			
 
				+        保存 HTML 报告
			
 
				+
			
 
				+        Args:
			
 
				+            html_content: HTML 内容
			
 
				+            filename: 文件名
			
 
				+            is_summary: 是否为汇总报告
			
 
				+
			
 
				+        Returns:
			
 
				+            保存的文件路径
			
 
				+        """
			
 
				+        if not self.enable_html:
			
 
				+            return None
			
 
				+
			
 
				+        try:
			
 
				+            date_folder = self._format_date_folder()
			
 
				+            html_dir = self.data_dir / date_folder / "html"
			
 
				+            html_dir.mkdir(parents=True, exist_ok=True)
			
 
				+
			
 
				+            file_path = html_dir / filename
			
 
				+
			
 
				+            with open(file_path, "w", encoding="utf-8") as f:
			
 
				+                f.write(html_content)
			
 
				+
			
 
				+            print(f"[本地存储] HTML 报告已保存: {file_path}")
			
 
				+            return str(file_path)
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            print(f"[本地存储] 保存 HTML 报告失败: {e}")
			
 
				+            return None
			
 
				+
			
 
				+    def is_first_crawl_today(self, date: Optional[str] = None) -> bool:
			
 
				+        """
			
 
				+        检查是否是当天第一次抓取
			
 
				+
			
 
				+        Args:
			
 
				+            date: 日期字符串，默认为今天
			
 
				+
			
 
				+        Returns:
			
 
				+            是否是第一次抓取
			
 
				+        """
			
 
				+        try:
			
 
				+            db_path = self._get_db_path(date)
			
 
				+            if not db_path.exists():
			
 
				+                return True
			
 
				+
			
 
				+            conn = self._get_connection(date)
			
 
				+            cursor = conn.cursor()
			
 
				+
			
 
				+            cursor.execute("""
			
 
				+                SELECT COUNT(*) as count FROM crawl_records
			
 
				+            """)
			
 
				+
			
 
				+            row = cursor.fetchone()
			
 
				+            count = row[0] if row else 0
			
 
				+
			
 
				+            # 如果只有一条或没有记录，视为第一次抓取
			
 
				+            return count <= 1
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            print(f"[本地存储] 检查首次抓取失败: {e}")
			
 
				+            return True
			
 
				+
			
 
				+    def get_crawl_times(self, date: Optional[str] = None) -> List[str]:
			
 
				+        """
			
 
				+        获取指定日期的所有抓取时间列表
			
 
				+
			
 
				+        Args:
			
 
				+            date: 日期字符串，默认为今天
			
 
				+
			
 
				+        Returns:
			
 
				+            抓取时间列表（按时间排序）
			
 
				+        """
			
 
				+        try:
			
 
				+            db_path = self._get_db_path(date)
			
 
				+            if not db_path.exists():
			
 
				+                return []
			
 
				+
			
 
				+            conn = self._get_connection(date)
			
 
				+            cursor = conn.cursor()
			
 
				+
			
 
				+            cursor.execute("""
			
 
				+                SELECT crawl_time FROM crawl_records
			
 
				+                ORDER BY crawl_time
			
 
				+            """)
			
 
				+
			
 
				+            rows = cursor.fetchall()
			
 
				+            return [row[0] for row in rows]
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            print(f"[本地存储] 获取抓取时间列表失败: {e}")
			
 
				+            return []
			
 
				+
			
 
				+    def cleanup(self) -> None:
			
 
				+        """清理资源（关闭数据库连接）"""
			
 
				+        for db_path, conn in self._db_connections.items():
			
 
				+            try:
			
 
				+                conn.close()
			
 
				+                print(f"[本地存储] 关闭数据库连接: {db_path}")
			
 
				+            except Exception as e:
			
 
				+                print(f"[本地存储] 关闭连接失败 {db_path}: {e}")
			
 
				+
			
 
				+        self._db_connections.clear()
			
 
				+
			
 
				+    def cleanup_old_data(self, retention_days: int) -> int:
			
 
				+        """
			
 
				+        清理过期数据
			
 
				+
			
 
				+        Args:
			
 
				+            retention_days: 保留天数（0 表示不清理）
			
 
				+
			
 
				+        Returns:
			
 
				+            删除的日期目录数量
			
 
				+        """
			
 
				+        if retention_days <= 0:
			
 
				+            return 0
			
 
				+
			
 
				+        deleted_count = 0
			
 
				+        cutoff_date = self._get_configured_time() - timedelta(days=retention_days)
			
 
				+
			
 
				+        try:
			
 
				+            if not self.data_dir.exists():
			
 
				+                return 0
			
 
				+
			
 
				+            for date_folder in self.data_dir.iterdir():
			
 
				+                if not date_folder.is_dir() or date_folder.name.startswith('.'):
			
 
				+                    continue
			
 
				+
			
 
				+                # 解析日期文件夹名（支持两种格式）
			
 
				+                folder_date = None
			
 
				+                try:
			
 
				+                    # ISO 格式: YYYY-MM-DD
			
 
				+                    date_match = re.match(r'(\d{4})-(\d{2})-(\d{2})', date_folder.name)
			
 
				+                    if date_match:
			
 
				+                        folder_date = datetime(
			
 
				+                            int(date_match.group(1)),
			
 
				+                            int(date_match.group(2)),
			
 
				+                            int(date_match.group(3)),
			
 
				+                            tzinfo=pytz.timezone("Asia/Shanghai")
			
 
				+                        )
			
 
				+                    else:
			
 
				+                        # 旧中文格式: YYYY年MM月DD日
			
 
				+                        date_match = re.match(r'(\d{4})年(\d{2})月(\d{2})日', date_folder.name)
			
 
				+                        if date_match:
			
 
				+                            folder_date = datetime(
			
 
				+                                int(date_match.group(1)),
			
 
				+                                int(date_match.group(2)),
			
 
				+                                int(date_match.group(3)),
			
 
				+                                tzinfo=pytz.timezone("Asia/Shanghai")
			
 
				+                            )
			
 
				+                except Exception:
			
 
				+                    continue
			
 
				+
			
 
				+                if folder_date and folder_date < cutoff_date:
			
 
				+                    # 先关闭该日期的数据库连接
			
 
				+                    db_path = str(self._get_db_path(date_folder.name))
			
 
				+                    if db_path in self._db_connections:
			
 
				+                        try:
			
 
				+                            self._db_connections[db_path].close()
			
 
				+                            del self._db_connections[db_path]
			
 
				+                        except Exception:
			
 
				+                            pass
			
 
				+
			
 
				+                    # 删除整个日期目录
			
 
				+                    try:
			
 
				+                        shutil.rmtree(date_folder)
			
 
				+                        deleted_count += 1
			
 
				+                        print(f"[本地存储] 清理过期数据: {date_folder.name}")
			
 
				+                    except Exception as e:
			
 
				+                        print(f"[本地存储] 删除目录失败 {date_folder.name}: {e}")
			
 
				+
			
 
				+            if deleted_count > 0:
			
 
				+                print(f"[本地存储] 共清理 {deleted_count} 个过期日期目录")
			
 
				+
			
 
				+            return deleted_count
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            print(f"[本地存储] 清理过期数据失败: {e}")
			
 
				+            return deleted_count
			
 
				+
			
 
				+    def has_pushed_today(self, date: Optional[str] = None) -> bool:
			
 
				+        """
			
 
				+        检查指定日期是否已推送过
			
 
				+
			
 
				+        Args:
			
 
				+            date: 日期字符串（YYYY-MM-DD），默认为今天
			
 
				+
			
 
				+        Returns:
			
 
				+            是否已推送
			
 
				+        """
			
 
				+        try:
			
 
				+            conn = self._get_connection(date)
			
 
				+            cursor = conn.cursor()
			
 
				+
			
 
				+            target_date = self._format_date_folder(date)
			
 
				+
			
 
				+            cursor.execute("""
			
 
				+                SELECT pushed FROM push_records WHERE date = ?
			
 
				+            """, (target_date,))
			
 
				+
			
 
				+            row = cursor.fetchone()
			
 
				+            if row:
			
 
				+                return bool(row[0])
			
 
				+            return False
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            print(f"[本地存储] 检查推送记录失败: {e}")
			
 
				+            return False
			
 
				+
			
 
				+    def record_push(self, report_type: str, date: Optional[str] = None) -> bool:
			
 
				+        """
			
 
				+        记录推送
			
 
				+
			
 
				+        Args:
			
 
				+            report_type: 报告类型
			
 
				+            date: 日期字符串（YYYY-MM-DD），默认为今天
			
 
				+
			
 
				+        Returns:
			
 
				+            是否记录成功
			
 
				+        """
			
 
				+        try:
			
 
				+            conn = self._get_connection(date)
			
 
				+            cursor = conn.cursor()
			
 
				+
			
 
				+            target_date = self._format_date_folder(date)
			
 
				+            now_str = self._get_configured_time().strftime("%Y-%m-%d %H:%M:%S")
			
 
				+
			
 
				+            cursor.execute("""
			
 
				+                INSERT INTO push_records (date, pushed, push_time, report_type, created_at)
			
 
				+                VALUES (?, 1, ?, ?, ?)
			
 
				+                ON CONFLICT(date) DO UPDATE SET
			
 
				+                    pushed = 1,
			
 
				+                    push_time = excluded.push_time,
			
 
				+                    report_type = excluded.report_type
			
 
				+            """, (target_date, now_str, report_type, now_str))
			
 
				+
			
 
				+            conn.commit()
			
 
				+
			
 
				+            print(f"[本地存储] 推送记录已保存: {report_type} at {now_str}")
			
 
				+            return True
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            print(f"[本地存储] 记录推送失败: {e}")
			
 
				+            return False
			
 
				+
			
 
				+    def __del__(self):
			
 
				+        """析构函数，确保关闭连接"""
			
 
				+        self.cleanup()
			
--- a/trendradar/storage/manager.py
+++ b/trendradar/storage/manager.py
@@ -0,0 +1,316 @@
 
				+# coding=utf-8
			
 
				+"""
			
 
				+存储管理器 - 统一管理存储后端
			
 
				+
			
 
				+根据环境和配置自动选择合适的存储后端
			
 
				+"""
			
 
				+
			
 
				+import os
			
 
				+from typing import Optional
			
 
				+
			
 
				+from trendradar.storage.base import StorageBackend, NewsData
			
 
				+
			
 
				+
			
 
				+# 存储管理器单例
			
 
				+_storage_manager: Optional["StorageManager"] = None
			
 
				+
			
 
				+
			
 
				+class StorageManager:
			
 
				+    """
			
 
				+    存储管理器
			
 
				+
			
 
				+    功能：
			
 
				+    - 自动检测运行环境（GitHub Actions / Docker / 本地）
			
 
				+    - 根据配置选择存储后端（local / remote / auto）
			
 
				+    - 提供统一的存储接口
			
 
				+    - 支持从远程拉取数据到本地
			
 
				+    """
			
 
				+
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+        backend_type: str = "auto",
			
 
				+        data_dir: str = "output",
			
 
				+        enable_txt: bool = True,
			
 
				+        enable_html: bool = True,
			
 
				+        remote_config: Optional[dict] = None,
			
 
				+        local_retention_days: int = 0,
			
 
				+        remote_retention_days: int = 0,
			
 
				+        pull_enabled: bool = False,
			
 
				+        pull_days: int = 0,
			
 
				+        timezone: str = "Asia/Shanghai",
			
 
				+    ):
			
 
				+        """
			
 
				+        初始化存储管理器
			
 
				+
			
 
				+        Args:
			
 
				+            backend_type: 存储后端类型 (local / remote / auto)
			
 
				+            data_dir: 本地数据目录
			
 
				+            enable_txt: 是否启用 TXT 快照
			
 
				+            enable_html: 是否启用 HTML 报告
			
 
				+            remote_config: 远程存储配置（endpoint_url, bucket_name, access_key_id 等）
			
 
				+            local_retention_days: 本地数据保留天数（0 = 无限制）
			
 
				+            remote_retention_days: 远程数据保留天数（0 = 无限制）
			
 
				+            pull_enabled: 是否启用启动时自动拉取
			
 
				+            pull_days: 拉取最近 N 天的数据
			
 
				+            timezone: 时区配置（默认 Asia/Shanghai）
			
 
				+        """
			
 
				+        self.backend_type = backend_type
			
 
				+        self.data_dir = data_dir
			
 
				+        self.enable_txt = enable_txt
			
 
				+        self.enable_html = enable_html
			
 
				+        self.remote_config = remote_config or {}
			
 
				+        self.local_retention_days = local_retention_days
			
 
				+        self.remote_retention_days = remote_retention_days
			
 
				+        self.pull_enabled = pull_enabled
			
 
				+        self.pull_days = pull_days
			
 
				+        self.timezone = timezone
			
 
				+
			
 
				+        self._backend: Optional[StorageBackend] = None
			
 
				+        self._remote_backend: Optional[StorageBackend] = None
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def is_github_actions() -> bool:
			
 
				+        """检测是否在 GitHub Actions 环境中运行"""
			
 
				+        return os.environ.get("GITHUB_ACTIONS") == "true"
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def is_docker() -> bool:
			
 
				+        """检测是否在 Docker 容器中运行"""
			
 
				+        # 方法1: 检查 /.dockerenv 文件
			
 
				+        if os.path.exists("/.dockerenv"):
			
 
				+            return True
			
 
				+
			
 
				+        # 方法2: 检查 cgroup（Linux）
			
 
				+        try:
			
 
				+            with open("/proc/1/cgroup", "r") as f:
			
 
				+                return "docker" in f.read()
			
 
				+        except (FileNotFoundError, PermissionError):
			
 
				+            pass
			
 
				+
			
 
				+        # 方法3: 检查环境变量
			
 
				+        return os.environ.get("DOCKER_CONTAINER") == "true"
			
 
				+
			
 
				+    def _resolve_backend_type(self) -> str:
			
 
				+        """解析实际使用的后端类型"""
			
 
				+        if self.backend_type == "auto":
			
 
				+            if self.is_github_actions():
			
 
				+                # GitHub Actions 环境，检查是否配置了远程存储
			
 
				+                if self._has_remote_config():
			
 
				+                    return "remote"
			
 
				+                else:
			
 
				+                    print("[存储管理器] GitHub Actions 环境但未配置远程存储，使用本地存储")
			
 
				+                    return "local"
			
 
				+            else:
			
 
				+                return "local"
			
 
				+        return self.backend_type
			
 
				+
			
 
				+    def _has_remote_config(self) -> bool:
			
 
				+        """检查是否有有效的远程存储配置"""
			
 
				+        # 检查配置或环境变量
			
 
				+        bucket_name = self.remote_config.get("bucket_name") or os.environ.get("S3_BUCKET_NAME")
			
 
				+        access_key = self.remote_config.get("access_key_id") or os.environ.get("S3_ACCESS_KEY_ID")
			
 
				+        secret_key = self.remote_config.get("secret_access_key") or os.environ.get("S3_SECRET_ACCESS_KEY")
			
 
				+        endpoint = self.remote_config.get("endpoint_url") or os.environ.get("S3_ENDPOINT_URL")
			
 
				+
			
 
				+        # 调试日志
			
 
				+        has_config = bool(bucket_name and access_key and secret_key and endpoint)
			
 
				+        if not has_config:
			
 
				+            print(f"[存储管理器] 远程存储配置检查失败:")
			
 
				+            print(f"  - bucket_name: {'已配置' if bucket_name else '未配置'}")
			
 
				+            print(f"  - access_key_id: {'已配置' if access_key else '未配置'}")
			
 
				+            print(f"  - secret_access_key: {'已配置' if secret_key else '未配置'}")
			
 
				+            print(f"  - endpoint_url: {'已配置' if endpoint else '未配置'}")
			
 
				+
			
 
				+        return has_config
			
 
				+
			
 
				+    def _create_remote_backend(self) -> Optional[StorageBackend]:
			
 
				+        """创建远程存储后端"""
			
 
				+        try:
			
 
				+            from trendradar.storage.remote import RemoteStorageBackend
			
 
				+
			
 
				+            return RemoteStorageBackend(
			
 
				+                bucket_name=self.remote_config.get("bucket_name") or os.environ.get("S3_BUCKET_NAME", ""),
			
 
				+                access_key_id=self.remote_config.get("access_key_id") or os.environ.get("S3_ACCESS_KEY_ID", ""),
			
 
				+                secret_access_key=self.remote_config.get("secret_access_key") or os.environ.get("S3_SECRET_ACCESS_KEY", ""),
			
 
				+                endpoint_url=self.remote_config.get("endpoint_url") or os.environ.get("S3_ENDPOINT_URL", ""),
			
 
				+                region=self.remote_config.get("region") or os.environ.get("S3_REGION", ""),
			
 
				+                enable_txt=self.enable_txt,
			
 
				+                enable_html=self.enable_html,
			
 
				+                timezone=self.timezone,
			
 
				+            )
			
 
				+        except ImportError as e:
			
 
				+            print(f"[存储管理器] 远程后端导入失败: {e}")
			
 
				+            print("[存储管理器] 请确保已安装 boto3: pip install boto3")
			
 
				+            return None
			
 
				+        except Exception as e:
			
 
				+            print(f"[存储管理器] 远程后端初始化失败: {e}")
			
 
				+            return None
			
 
				+
			
 
				+    def get_backend(self) -> StorageBackend:
			
 
				+        """获取存储后端实例"""
			
 
				+        if self._backend is None:
			
 
				+            resolved_type = self._resolve_backend_type()
			
 
				+
			
 
				+            if resolved_type == "remote":
			
 
				+                self._backend = self._create_remote_backend()
			
 
				+                if self._backend:
			
 
				+                    print(f"[存储管理器] 使用远程存储后端")
			
 
				+                else:
			
 
				+                    print("[存储管理器] 回退到本地存储")
			
 
				+                    resolved_type = "local"
			
 
				+
			
 
				+            if resolved_type == "local" or self._backend is None:
			
 
				+                from trendradar.storage.local import LocalStorageBackend
			
 
				+
			
 
				+                self._backend = LocalStorageBackend(
			
 
				+                    data_dir=self.data_dir,
			
 
				+                    enable_txt=self.enable_txt,
			
 
				+                    enable_html=self.enable_html,
			
 
				+                    timezone=self.timezone,
			
 
				+                )
			
 
				+                print(f"[存储管理器] 使用本地存储后端 (数据目录: {self.data_dir})")
			
 
				+
			
 
				+        return self._backend
			
 
				+
			
 
				+    def pull_from_remote(self) -> int:
			
 
				+        """
			
 
				+        从远程拉取数据到本地
			
 
				+
			
 
				+        Returns:
			
 
				+            成功拉取的文件数量
			
 
				+        """
			
 
				+        if not self.pull_enabled or self.pull_days <= 0:
			
 
				+            return 0
			
 
				+
			
 
				+        if not self._has_remote_config():
			
 
				+            print("[存储管理器] 未配置远程存储，无法拉取")
			
 
				+            return 0
			
 
				+
			
 
				+        # 创建远程后端（如果还没有）
			
 
				+        if self._remote_backend is None:
			
 
				+            self._remote_backend = self._create_remote_backend()
			
 
				+
			
 
				+        if self._remote_backend is None:
			
 
				+            print("[存储管理器] 无法创建远程后端，拉取失败")
			
 
				+            return 0
			
 
				+
			
 
				+        # 调用拉取方法
			
 
				+        return self._remote_backend.pull_recent_days(self.pull_days, self.data_dir)
			
 
				+
			
 
				+    def save_news_data(self, data: NewsData) -> bool:
			
 
				+        """保存新闻数据"""
			
 
				+        return self.get_backend().save_news_data(data)
			
 
				+
			
 
				+    def get_today_all_data(self, date: Optional[str] = None) -> Optional[NewsData]:
			
 
				+        """获取当天所有数据"""
			
 
				+        return self.get_backend().get_today_all_data(date)
			
 
				+
			
 
				+    def get_latest_crawl_data(self, date: Optional[str] = None) -> Optional[NewsData]:
			
 
				+        """获取最新抓取数据"""
			
 
				+        return self.get_backend().get_latest_crawl_data(date)
			
 
				+
			
 
				+    def detect_new_titles(self, current_data: NewsData) -> dict:
			
 
				+        """检测新增标题"""
			
 
				+        return self.get_backend().detect_new_titles(current_data)
			
 
				+
			
 
				+    def save_txt_snapshot(self, data: NewsData) -> Optional[str]:
			
 
				+        """保存 TXT 快照"""
			
 
				+        return self.get_backend().save_txt_snapshot(data)
			
 
				+
			
 
				+    def save_html_report(self, html_content: str, filename: str, is_summary: bool = False) -> Optional[str]:
			
 
				+        """保存 HTML 报告"""
			
 
				+        return self.get_backend().save_html_report(html_content, filename, is_summary)
			
 
				+
			
 
				+    def is_first_crawl_today(self, date: Optional[str] = None) -> bool:
			
 
				+        """检查是否是当天第一次抓取"""
			
 
				+        return self.get_backend().is_first_crawl_today(date)
			
 
				+
			
 
				+    def cleanup(self) -> None:
			
 
				+        """清理资源"""
			
 
				+        if self._backend:
			
 
				+            self._backend.cleanup()
			
 
				+        if self._remote_backend:
			
 
				+            self._remote_backend.cleanup()
			
 
				+
			
 
				+    def cleanup_old_data(self) -> int:
			
 
				+        """
			
 
				+        清理过期数据
			
 
				+
			
 
				+        Returns:
			
 
				+            删除的日期目录数量
			
 
				+        """
			
 
				+        total_deleted = 0
			
 
				+
			
 
				+        # 清理本地数据
			
 
				+        if self.local_retention_days > 0:
			
 
				+            total_deleted += self.get_backend().cleanup_old_data(self.local_retention_days)
			
 
				+
			
 
				+        # 清理远程数据（如果配置了）
			
 
				+        if self.remote_retention_days > 0 and self._has_remote_config():
			
 
				+            if self._remote_backend is None:
			
 
				+                self._remote_backend = self._create_remote_backend()
			
 
				+            if self._remote_backend:
			
 
				+                total_deleted += self._remote_backend.cleanup_old_data(self.remote_retention_days)
			
 
				+
			
 
				+        return total_deleted
			
 
				+
			
 
				+    @property
			
 
				+    def backend_name(self) -> str:
			
 
				+        """获取当前后端名称"""
			
 
				+        return self.get_backend().backend_name
			
 
				+
			
 
				+    @property
			
 
				+    def supports_txt(self) -> bool:
			
 
				+        """是否支持 TXT 快照"""
			
 
				+        return self.get_backend().supports_txt
			
 
				+
			
 
				+
			
 
				+def get_storage_manager(
			
 
				+    backend_type: str = "auto",
			
 
				+    data_dir: str = "output",
			
 
				+    enable_txt: bool = True,
			
 
				+    enable_html: bool = True,
			
 
				+    remote_config: Optional[dict] = None,
			
 
				+    local_retention_days: int = 0,
			
 
				+    remote_retention_days: int = 0,
			
 
				+    pull_enabled: bool = False,
			
 
				+    pull_days: int = 0,
			
 
				+    timezone: str = "Asia/Shanghai",
			
 
				+    force_new: bool = False,
			
 
				+) -> StorageManager:
			
 
				+    """
			
 
				+    获取存储管理器单例
			
 
				+
			
 
				+    Args:
			
 
				+        backend_type: 存储后端类型
			
 
				+        data_dir: 本地数据目录
			
 
				+        enable_txt: 是否启用 TXT 快照
			
 
				+        enable_html: 是否启用 HTML 报告
			
 
				+        remote_config: 远程存储配置
			
 
				+        local_retention_days: 本地数据保留天数（0 = 无限制）
			
 
				+        remote_retention_days: 远程数据保留天数（0 = 无限制）
			
 
				+        pull_enabled: 是否启用启动时自动拉取
			
 
				+        pull_days: 拉取最近 N 天的数据
			
 
				+        timezone: 时区配置（默认 Asia/Shanghai）
			
 
				+        force_new: 是否强制创建新实例
			
 
				+
			
 
				+    Returns:
			
 
				+        StorageManager 实例
			
 
				+    """
			
 
				+    global _storage_manager
			
 
				+
			
 
				+    if _storage_manager is None or force_new:
			
 
				+        _storage_manager = StorageManager(
			
 
				+            backend_type=backend_type,
			
 
				+            data_dir=data_dir,
			
 
				+            enable_txt=enable_txt,
			
 
				+            enable_html=enable_html,
			
 
				+            remote_config=remote_config,
			
 
				+            local_retention_days=local_retention_days,
			
 
				+            remote_retention_days=remote_retention_days,
			
 
				+            pull_enabled=pull_enabled,
			
 
				+            pull_days=pull_days,
			
 
				+            timezone=timezone,
			
 
				+        )
			
 
				+
			
 
				+    return _storage_manager
			
--- a/trendradar/storage/remote.py
+++ b/trendradar/storage/remote.py
@@ -0,0 +1,1071 @@
 
				+# coding=utf-8
			
 
				+"""
			
 
				+远程存储后端（S3 兼容协议）
			
 
				+
			
 
				+支持 Cloudflare R2、阿里云 OSS、腾讯云 COS、AWS S3、MinIO 等
			
 
				+使用 S3 兼容 API (boto3) 访问对象存储
			
 
				+数据流程：下载当天 SQLite → 合并新数据 → 上传回远程
			
 
				+"""
			
 
				+
			
 
				+import atexit
			
 
				+import os
			
 
				+import pytz
			
 
				+import re
			
 
				+import shutil
			
 
				+import sys
			
 
				+import tempfile
			
 
				+import sqlite3
			
 
				+from datetime import datetime, timedelta
			
 
				+from pathlib import Path
			
 
				+from typing import Dict, List, Optional, Any
			
 
				+
			
 
				+try:
			
 
				+    import boto3
			
 
				+    from botocore.exceptions import ClientError
			
 
				+    HAS_BOTO3 = True
			
 
				+except ImportError:
			
 
				+    HAS_BOTO3 = False
			
 
				+    boto3 = None
			
 
				+    ClientError = Exception
			
 
				+
			
 
				+from trendradar.storage.base import StorageBackend, NewsItem, NewsData
			
 
				+from trendradar.utils.time import (
			
 
				+    get_configured_time,
			
 
				+    format_date_folder,
			
 
				+    format_time_filename,
			
 
				+)
			
 
				+
			
 
				+
			
 
				+class RemoteStorageBackend(StorageBackend):
			
 
				+    """
			
 
				+    远程云存储后端（S3 兼容协议）
			
 
				+
			
 
				+    特点：
			
 
				+    - 使用 S3 兼容 API 访问远程存储
			
 
				+    - 支持 Cloudflare R2、阿里云 OSS、腾讯云 COS、AWS S3、MinIO 等
			
 
				+    - 下载 SQLite 到临时目录进行操作
			
 
				+    - 支持数据合并和上传
			
 
				+    - 支持从远程拉取历史数据到本地
			
 
				+    - 运行结束后自动清理临时文件
			
 
				+    """
			
 
				+
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+        bucket_name: str,
			
 
				+        access_key_id: str,
			
 
				+        secret_access_key: str,
			
 
				+        endpoint_url: str,
			
 
				+        region: str = "",
			
 
				+        enable_txt: bool = False,  # 远程模式默认不生成 TXT
			
 
				+        enable_html: bool = True,
			
 
				+        temp_dir: Optional[str] = None,
			
 
				+        timezone: str = "Asia/Shanghai",
			
 
				+    ):
			
 
				+        """
			
 
				+        初始化远程存储后端
			
 
				+
			
 
				+        Args:
			
 
				+            bucket_name: 存储桶名称
			
 
				+            access_key_id: 访问密钥 ID
			
 
				+            secret_access_key: 访问密钥
			
 
				+            endpoint_url: 服务端点 URL
			
 
				+            region: 区域（可选，部分服务商需要）
			
 
				+            enable_txt: 是否启用 TXT 快照（默认关闭）
			
 
				+            enable_html: 是否启用 HTML 报告
			
 
				+            temp_dir: 临时目录路径（默认使用系统临时目录）
			
 
				+            timezone: 时区配置（默认 Asia/Shanghai）
			
 
				+        """
			
 
				+        if not HAS_BOTO3:
			
 
				+            raise ImportError("远程存储后端需要安装 boto3: pip install boto3")
			
 
				+
			
 
				+        self.bucket_name = bucket_name
			
 
				+        self.endpoint_url = endpoint_url
			
 
				+        self.region = region
			
 
				+        self.enable_txt = enable_txt
			
 
				+        self.enable_html = enable_html
			
 
				+        self.timezone = timezone
			
 
				+
			
 
				+        # 创建临时目录
			
 
				+        self.temp_dir = Path(temp_dir) if temp_dir else Path(tempfile.mkdtemp(prefix="trendradar_"))
			
 
				+        self.temp_dir.mkdir(parents=True, exist_ok=True)
			
 
				+
			
 
				+        # 初始化 S3 客户端
			
 
				+        client_kwargs = {
			
 
				+            "endpoint_url": endpoint_url,
			
 
				+            "aws_access_key_id": access_key_id,
			
 
				+            "aws_secret_access_key": secret_access_key,
			
 
				+        }
			
 
				+        if region:
			
 
				+            client_kwargs["region_name"] = region
			
 
				+
			
 
				+        self.s3_client = boto3.client("s3", **client_kwargs)
			
 
				+
			
 
				+        # 跟踪下载的文件（用于清理）
			
 
				+        self._downloaded_files: List[Path] = []
			
 
				+        self._db_connections: Dict[str, sqlite3.Connection] = {}
			
 
				+
			
 
				+        print(f"[远程存储] 初始化完成，存储桶: {bucket_name}")
			
 
				+
			
 
				+    @property
			
 
				+    def backend_name(self) -> str:
			
 
				+        return "remote"
			
 
				+
			
 
				+    @property
			
 
				+    def supports_txt(self) -> bool:
			
 
				+        return self.enable_txt
			
 
				+
			
 
				+    def _get_configured_time(self) -> datetime:
			
 
				+        """获取配置时区的当前时间"""
			
 
				+        return get_configured_time(self.timezone)
			
 
				+
			
 
				+    def _format_date_folder(self, date: Optional[str] = None) -> str:
			
 
				+        """格式化日期文件夹名 (ISO 格式: YYYY-MM-DD)"""
			
 
				+        return format_date_folder(date, self.timezone)
			
 
				+
			
 
				+    def _format_time_filename(self) -> str:
			
 
				+        """格式化时间文件名 (格式: HH-MM)"""
			
 
				+        return format_time_filename(self.timezone)
			
 
				+
			
 
				+    def _get_remote_db_key(self, date: Optional[str] = None) -> str:
			
 
				+        """获取 R2 中 SQLite 文件的对象键"""
			
 
				+        date_folder = self._format_date_folder(date)
			
 
				+        return f"news/{date_folder}.db"
			
 
				+
			
 
				+    def _get_local_db_path(self, date: Optional[str] = None) -> Path:
			
 
				+        """获取本地临时 SQLite 文件路径"""
			
 
				+        date_folder = self._format_date_folder(date)
			
 
				+        return self.temp_dir / date_folder / "news.db"
			
 
				+
			
 
				+    def _check_object_exists(self, r2_key: str) -> bool:
			
 
				+        """
			
 
				+        检查 R2 中对象是否存在
			
 
				+
			
 
				+        Args:
			
 
				+            r2_key: R2 对象键
			
 
				+
			
 
				+        Returns:
			
 
				+            是否存在
			
 
				+        """
			
 
				+        try:
			
 
				+            self.s3_client.head_object(Bucket=self.bucket_name, Key=r2_key)
			
 
				+            return True
			
 
				+        except ClientError as e:
			
 
				+            error_code = e.response.get("Error", {}).get("Code", "")
			
 
				+            # R2/S3 可能返回 404, NoSuchKey, 或其他变体
			
 
				+            if error_code in ("404", "NoSuchKey", "Not Found"):
			
 
				+                return False
			
 
				+            # 其他错误（如权限问题）也视为不存在，但打印警告
			
 
				+            print(f"[远程存储] 检查对象存在性失败 ({r2_key}): {e}")
			
 
				+            return False
			
 
				+        except Exception as e:
			
 
				+            print(f"[远程存储] 检查对象存在性异常 ({r2_key}): {e}")
			
 
				+            return False
			
 
				+
			
 
				+    def _download_sqlite(self, date: Optional[str] = None) -> Optional[Path]:
			
 
				+        """
			
 
				+        从 R2 下载当天的 SQLite 文件到本地临时目录
			
 
				+
			
 
				+        Args:
			
 
				+            date: 日期字符串
			
 
				+
			
 
				+        Returns:
			
 
				+            本地文件路径，如果不存在返回 None
			
 
				+        """
			
 
				+        r2_key = self._get_remote_db_key(date)
			
 
				+        local_path = self._get_local_db_path(date)
			
 
				+
			
 
				+        # 确保目录存在
			
 
				+        local_path.parent.mkdir(parents=True, exist_ok=True)
			
 
				+
			
 
				+        # 先检查文件是否存在
			
 
				+        if not self._check_object_exists(r2_key):
			
 
				+            print(f"[远程存储] 文件不存在，将创建新数据库: {r2_key}")
			
 
				+            return None
			
 
				+
			
 
				+        try:
			
 
				+            self.s3_client.download_file(self.bucket_name, r2_key, str(local_path))
			
 
				+            self._downloaded_files.append(local_path)
			
 
				+            print(f"[远程存储] 已下载: {r2_key} -> {local_path}")
			
 
				+            return local_path
			
 
				+        except ClientError as e:
			
 
				+            error_code = e.response.get("Error", {}).get("Code", "")
			
 
				+            # R2/S3 可能返回不同的错误码
			
 
				+            if error_code in ("404", "NoSuchKey", "Not Found"):
			
 
				+                print(f"[远程存储] 文件不存在，将创建新数据库: {r2_key}")
			
 
				+                return None
			
 
				+            else:
			
 
				+                print(f"[远程存储] 下载失败 (错误码: {error_code}): {e}")
			
 
				+                raise
			
 
				+        except Exception as e:
			
 
				+            print(f"[远程存储] 下载异常: {e}")
			
 
				+            raise
			
 
				+
			
 
				+    def _upload_sqlite(self, date: Optional[str] = None) -> bool:
			
 
				+        """
			
 
				+        上传本地 SQLite 文件到 R2
			
 
				+
			
 
				+        Args:
			
 
				+            date: 日期字符串
			
 
				+
			
 
				+        Returns:
			
 
				+            是否上传成功
			
 
				+        """
			
 
				+        local_path = self._get_local_db_path(date)
			
 
				+        r2_key = self._get_remote_db_key(date)
			
 
				+
			
 
				+        if not local_path.exists():
			
 
				+            print(f"[远程存储] 本地文件不存在，无法上传: {local_path}")
			
 
				+            return False
			
 
				+
			
 
				+        try:
			
 
				+            # 获取本地文件大小
			
 
				+            local_size = local_path.stat().st_size
			
 
				+            print(f"[远程存储] 准备上传: {local_path} ({local_size} bytes) -> {r2_key}")
			
 
				+
			
 
				+            self.s3_client.upload_file(str(local_path), self.bucket_name, r2_key)
			
 
				+            print(f"[远程存储] 已上传: {local_path} -> {r2_key}")
			
 
				+
			
 
				+            # 验证上传成功
			
 
				+            if self._check_object_exists(r2_key):
			
 
				+                print(f"[远程存储] 上传验证成功: {r2_key}")
			
 
				+                return True
			
 
				+            else:
			
 
				+                print(f"[远程存储] 上传验证失败: 文件未在 R2 中找到")
			
 
				+                return False
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            print(f"[远程存储] 上传失败: {e}")
			
 
				+            return False
			
 
				+
			
 
				+    def _get_connection(self, date: Optional[str] = None) -> sqlite3.Connection:
			
 
				+        """获取数据库连接"""
			
 
				+        local_path = self._get_local_db_path(date)
			
 
				+        db_path = str(local_path)
			
 
				+
			
 
				+        if db_path not in self._db_connections:
			
 
				+            # 确保目录存在
			
 
				+            local_path.parent.mkdir(parents=True, exist_ok=True)
			
 
				+
			
 
				+            # 如果本地不存在，尝试从 R2 下载
			
 
				+            if not local_path.exists():
			
 
				+                self._download_sqlite(date)
			
 
				+
			
 
				+            conn = sqlite3.connect(db_path)
			
 
				+            conn.row_factory = sqlite3.Row
			
 
				+            self._init_tables(conn)
			
 
				+            self._db_connections[db_path] = conn
			
 
				+
			
 
				+        return self._db_connections[db_path]
			
 
				+
			
 
				+    def _get_schema_path(self) -> Path:
			
 
				+        """获取 schema.sql 文件路径"""
			
 
				+        return Path(__file__).parent / "schema.sql"
			
 
				+
			
 
				+    def _init_tables(self, conn: sqlite3.Connection) -> None:
			
 
				+        """从 schema.sql 初始化数据库表结构"""
			
 
				+        schema_path = self._get_schema_path()
			
 
				+        
			
 
				+        if schema_path.exists():
			
 
				+            with open(schema_path, "r", encoding="utf-8") as f:
			
 
				+                schema_sql = f.read()
			
 
				+            conn.executescript(schema_sql)
			
 
				+        else:
			
 
				+            raise FileNotFoundError(f"Schema file not found: {schema_path}")
			
 
				+
			
 
				+        conn.commit()
			
 
				+
			
 
				+    def save_news_data(self, data: NewsData) -> bool:
			
 
				+        """
			
 
				+        保存新闻数据到 R2（以 URL 为唯一标识，支持标题更新检测）
			
 
				+
			
 
				+        流程：下载现有数据库 → 插入/更新数据 → 上传回 R2
			
 
				+
			
 
				+        Args:
			
 
				+            data: 新闻数据
			
 
				+
			
 
				+        Returns:
			
 
				+            是否保存成功
			
 
				+        """
			
 
				+        try:
			
 
				+            conn = self._get_connection(data.date)
			
 
				+            cursor = conn.cursor()
			
 
				+
			
 
				+            # 查询已有记录数
			
 
				+            cursor.execute("SELECT COUNT(*) as count FROM news_items")
			
 
				+            row = cursor.fetchone()
			
 
				+            existing_count = row[0] if row else 0
			
 
				+            if existing_count > 0:
			
 
				+                print(f"[远程存储] 已有 {existing_count} 条历史记录，将合并新数据")
			
 
				+
			
 
				+            # 获取配置时区的当前时间
			
 
				+            now_str = self._get_configured_time().strftime("%Y-%m-%d %H:%M:%S")
			
 
				+
			
 
				+            # 首先同步平台信息到 platforms 表
			
 
				+            for source_id, source_name in data.id_to_name.items():
			
 
				+                cursor.execute("""
			
 
				+                    INSERT INTO platforms (id, name, updated_at)
			
 
				+                    VALUES (?, ?, ?)
			
 
				+                    ON CONFLICT(id) DO UPDATE SET
			
 
				+                        name = excluded.name,
			
 
				+                        updated_at = excluded.updated_at
			
 
				+                """, (source_id, source_name, now_str))
			
 
				+
			
 
				+            # 统计计数器
			
 
				+            new_count = 0
			
 
				+            updated_count = 0
			
 
				+            title_changed_count = 0
			
 
				+            success_sources = []
			
 
				+
			
 
				+            for source_id, news_list in data.items.items():
			
 
				+                success_sources.append(source_id)
			
 
				+
			
 
				+                for item in news_list:
			
 
				+                    try:
			
 
				+                        # 检查是否已存在（通过 URL + platform_id）
			
 
				+                        if item.url:
			
 
				+                            cursor.execute("""
			
 
				+                                SELECT id, title FROM news_items
			
 
				+                                WHERE url = ? AND platform_id = ?
			
 
				+                            """, (item.url, source_id))
			
 
				+                            existing = cursor.fetchone()
			
 
				+
			
 
				+                            if existing:
			
 
				+                                # 已存在，更新记录
			
 
				+                                existing_id, existing_title = existing
			
 
				+
			
 
				+                                # 检查标题是否变化
			
 
				+                                if existing_title != item.title:
			
 
				+                                    # 记录标题变更
			
 
				+                                    cursor.execute("""
			
 
				+                                        INSERT INTO title_changes
			
 
				+                                        (news_item_id, old_title, new_title, changed_at)
			
 
				+                                        VALUES (?, ?, ?, ?)
			
 
				+                                    """, (existing_id, existing_title, item.title, now_str))
			
 
				+                                    title_changed_count += 1
			
 
				+
			
 
				+                                # 记录排名历史
			
 
				+                                cursor.execute("""
			
 
				+                                    INSERT INTO rank_history
			
 
				+                                    (news_item_id, rank, crawl_time, created_at)
			
 
				+                                    VALUES (?, ?, ?, ?)
			
 
				+                                """, (existing_id, item.rank, data.crawl_time, now_str))
			
 
				+
			
 
				+                                # 更新现有记录
			
 
				+                                cursor.execute("""
			
 
				+                                    UPDATE news_items SET
			
 
				+                                        title = ?,
			
 
				+                                        rank = ?,
			
 
				+                                        mobile_url = ?,
			
 
				+                                        last_crawl_time = ?,
			
 
				+                                        crawl_count = crawl_count + 1,
			
 
				+                                        updated_at = ?
			
 
				+                                    WHERE id = ?
			
 
				+                                """, (item.title, item.rank, item.mobile_url,
			
 
				+                                      data.crawl_time, now_str, existing_id))
			
 
				+                                updated_count += 1
			
 
				+                            else:
			
 
				+                                # 不存在，插入新记录
			
 
				+                                cursor.execute("""
			
 
				+                                    INSERT INTO news_items
			
 
				+                                    (title, platform_id, rank, url, mobile_url,
			
 
				+                                     first_crawl_time, last_crawl_time, crawl_count,
			
 
				+                                     created_at, updated_at)
			
 
				+                                    VALUES (?, ?, ?, ?, ?, ?, ?, 1, ?, ?)
			
 
				+                                """, (item.title, source_id, item.rank, item.url,
			
 
				+                                      item.mobile_url, data.crawl_time, data.crawl_time,
			
 
				+                                      now_str, now_str))
			
 
				+                                new_id = cursor.lastrowid
			
 
				+                                # 记录初始排名
			
 
				+                                cursor.execute("""
			
 
				+                                    INSERT INTO rank_history
			
 
				+                                    (news_item_id, rank, crawl_time, created_at)
			
 
				+                                    VALUES (?, ?, ?, ?)
			
 
				+                                """, (new_id, item.rank, data.crawl_time, now_str))
			
 
				+                                new_count += 1
			
 
				+                        else:
			
 
				+                            # URL 为空的情况，直接插入（不做去重）
			
 
				+                            cursor.execute("""
			
 
				+                                INSERT INTO news_items
			
 
				+                                (title, platform_id, rank, url, mobile_url,
			
 
				+                                 first_crawl_time, last_crawl_time, crawl_count,
			
 
				+                                 created_at, updated_at)
			
 
				+                                VALUES (?, ?, ?, ?, ?, ?, ?, 1, ?, ?)
			
 
				+                            """, (item.title, source_id, item.rank, item.url,
			
 
				+                                  item.mobile_url, data.crawl_time, data.crawl_time,
			
 
				+                                  now_str, now_str))
			
 
				+                            new_id = cursor.lastrowid
			
 
				+                            # 记录初始排名
			
 
				+                            cursor.execute("""
			
 
				+                                INSERT INTO rank_history
			
 
				+                                (news_item_id, rank, crawl_time, created_at)
			
 
				+                                VALUES (?, ?, ?, ?)
			
 
				+                            """, (new_id, item.rank, data.crawl_time, now_str))
			
 
				+                            new_count += 1
			
 
				+
			
 
				+                    except sqlite3.Error as e:
			
 
				+                        print(f"[远程存储] 保存新闻条目失败 [{item.title[:30]}...]: {e}")
			
 
				+
			
 
				+            total_items = new_count + updated_count
			
 
				+
			
 
				+            # 记录抓取信息
			
 
				+            cursor.execute("""
			
 
				+                INSERT OR REPLACE INTO crawl_records
			
 
				+                (crawl_time, total_items, created_at)
			
 
				+                VALUES (?, ?, ?)
			
 
				+            """, (data.crawl_time, total_items, now_str))
			
 
				+
			
 
				+            # 获取刚插入的 crawl_record 的 ID
			
 
				+            cursor.execute("""
			
 
				+                SELECT id FROM crawl_records WHERE crawl_time = ?
			
 
				+            """, (data.crawl_time,))
			
 
				+            record_row = cursor.fetchone()
			
 
				+            if record_row:
			
 
				+                crawl_record_id = record_row[0]
			
 
				+
			
 
				+                # 记录成功的来源
			
 
				+                for source_id in success_sources:
			
 
				+                    cursor.execute("""
			
 
				+                        INSERT OR REPLACE INTO crawl_source_status
			
 
				+                        (crawl_record_id, platform_id, status)
			
 
				+                        VALUES (?, ?, 'success')
			
 
				+                    """, (crawl_record_id, source_id))
			
 
				+
			
 
				+                # 记录失败的来源
			
 
				+                for failed_id in data.failed_ids:
			
 
				+                    # 确保失败的平台也在 platforms 表中
			
 
				+                    cursor.execute("""
			
 
				+                        INSERT OR IGNORE INTO platforms (id, name, updated_at)
			
 
				+                        VALUES (?, ?, ?)
			
 
				+                    """, (failed_id, failed_id, now_str))
			
 
				+
			
 
				+                    cursor.execute("""
			
 
				+                        INSERT OR REPLACE INTO crawl_source_status
			
 
				+                        (crawl_record_id, platform_id, status)
			
 
				+                        VALUES (?, ?, 'failed')
			
 
				+                    """, (crawl_record_id, failed_id))
			
 
				+
			
 
				+            conn.commit()
			
 
				+
			
 
				+            # 查询合并后的总记录数
			
 
				+            cursor.execute("SELECT COUNT(*) as count FROM news_items")
			
 
				+            row = cursor.fetchone()
			
 
				+            final_count = row[0] if row else 0
			
 
				+
			
 
				+            # 输出详细的存储统计日志
			
 
				+            log_parts = [f"[远程存储] 处理完成：新增 {new_count} 条"]
			
 
				+            if updated_count > 0:
			
 
				+                log_parts.append(f"更新 {updated_count} 条")
			
 
				+            if title_changed_count > 0:
			
 
				+                log_parts.append(f"标题变更 {title_changed_count} 条")
			
 
				+            log_parts.append(f"(去重后总计: {final_count} 条)")
			
 
				+            print("，".join(log_parts))
			
 
				+
			
 
				+            # 上传到 R2
			
 
				+            if self._upload_sqlite(data.date):
			
 
				+                print(f"[远程存储] 数据已同步到 R2")
			
 
				+                return True
			
 
				+            else:
			
 
				+                print(f"[远程存储] 上传 R2 失败")
			
 
				+                return False
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            print(f"[远程存储] 保存失败: {e}")
			
 
				+            return False
			
 
				+
			
 
				+    def get_today_all_data(self, date: Optional[str] = None) -> Optional[NewsData]:
			
 
				+        """获取指定日期的所有新闻数据（合并后）"""
			
 
				+        try:
			
 
				+            conn = self._get_connection(date)
			
 
				+            cursor = conn.cursor()
			
 
				+
			
 
				+            # 获取所有新闻数据（包含 id 用于查询排名历史）
			
 
				+            cursor.execute("""
			
 
				+                SELECT n.id, n.title, n.platform_id, p.name as platform_name,
			
 
				+                       n.rank, n.url, n.mobile_url,
			
 
				+                       n.first_crawl_time, n.last_crawl_time, n.crawl_count
			
 
				+                FROM news_items n
			
 
				+                LEFT JOIN platforms p ON n.platform_id = p.id
			
 
				+                ORDER BY n.platform_id, n.last_crawl_time
			
 
				+            """)
			
 
				+
			
 
				+            rows = cursor.fetchall()
			
 
				+            if not rows:
			
 
				+                return None
			
 
				+
			
 
				+            # 收集所有 news_item_id
			
 
				+            news_ids = [row[0] for row in rows]
			
 
				+
			
 
				+            # 批量查询排名历史
			
 
				+            rank_history_map: Dict[int, List[int]] = {}
			
 
				+            if news_ids:
			
 
				+                placeholders = ",".join("?" * len(news_ids))
			
 
				+                cursor.execute(f"""
			
 
				+                    SELECT news_item_id, rank FROM rank_history
			
 
				+                    WHERE news_item_id IN ({placeholders})
			
 
				+                    ORDER BY news_item_id, crawl_time
			
 
				+                """, news_ids)
			
 
				+                for rh_row in cursor.fetchall():
			
 
				+                    news_id, rank = rh_row[0], rh_row[1]
			
 
				+                    if news_id not in rank_history_map:
			
 
				+                        rank_history_map[news_id] = []
			
 
				+                    if rank not in rank_history_map[news_id]:
			
 
				+                        rank_history_map[news_id].append(rank)
			
 
				+
			
 
				+            # 按 platform_id 分组
			
 
				+            items: Dict[str, List[NewsItem]] = {}
			
 
				+            id_to_name: Dict[str, str] = {}
			
 
				+            crawl_date = self._format_date_folder(date)
			
 
				+
			
 
				+            for row in rows:
			
 
				+                news_id = row[0]
			
 
				+                platform_id = row[2]
			
 
				+                title = row[1]
			
 
				+                platform_name = row[3] or platform_id
			
 
				+
			
 
				+                id_to_name[platform_id] = platform_name
			
 
				+
			
 
				+                if platform_id not in items:
			
 
				+                    items[platform_id] = []
			
 
				+
			
 
				+                # 获取排名历史，如果没有则使用当前排名
			
 
				+                ranks = rank_history_map.get(news_id, [row[4]])
			
 
				+
			
 
				+                items[platform_id].append(NewsItem(
			
 
				+                    title=title,
			
 
				+                    source_id=platform_id,
			
 
				+                    source_name=platform_name,
			
 
				+                    rank=row[4],
			
 
				+                    url=row[5] or "",
			
 
				+                    mobile_url=row[6] or "",
			
 
				+                    crawl_time=row[8],  # last_crawl_time
			
 
				+                    ranks=ranks,
			
 
				+                    first_time=row[7],  # first_crawl_time
			
 
				+                    last_time=row[8],   # last_crawl_time
			
 
				+                    count=row[9],       # crawl_count
			
 
				+                ))
			
 
				+
			
 
				+            final_items = items
			
 
				+
			
 
				+            # 获取失败的来源
			
 
				+            cursor.execute("""
			
 
				+                SELECT DISTINCT css.platform_id
			
 
				+                FROM crawl_source_status css
			
 
				+                JOIN crawl_records cr ON css.crawl_record_id = cr.id
			
 
				+                WHERE css.status = 'failed'
			
 
				+            """)
			
 
				+            failed_ids = [row[0] for row in cursor.fetchall()]
			
 
				+
			
 
				+            # 获取最新的抓取时间
			
 
				+            cursor.execute("""
			
 
				+                SELECT crawl_time FROM crawl_records
			
 
				+                ORDER BY crawl_time DESC
			
 
				+                LIMIT 1
			
 
				+            """)
			
 
				+
			
 
				+            time_row = cursor.fetchone()
			
 
				+            crawl_time = time_row[0] if time_row else self._format_time_filename()
			
 
				+
			
 
				+            return NewsData(
			
 
				+                date=crawl_date,
			
 
				+                crawl_time=crawl_time,
			
 
				+                items=final_items,
			
 
				+                id_to_name=id_to_name,
			
 
				+                failed_ids=failed_ids,
			
 
				+            )
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            print(f"[远程存储] 读取数据失败: {e}")
			
 
				+            return None
			
 
				+
			
 
				+    def get_latest_crawl_data(self, date: Optional[str] = None) -> Optional[NewsData]:
			
 
				+        """获取最新一次抓取的数据"""
			
 
				+        try:
			
 
				+            conn = self._get_connection(date)
			
 
				+            cursor = conn.cursor()
			
 
				+
			
 
				+            # 获取最新的抓取时间
			
 
				+            cursor.execute("""
			
 
				+                SELECT crawl_time FROM crawl_records
			
 
				+                ORDER BY crawl_time DESC
			
 
				+                LIMIT 1
			
 
				+            """)
			
 
				+
			
 
				+            time_row = cursor.fetchone()
			
 
				+            if not time_row:
			
 
				+                return None
			
 
				+
			
 
				+            latest_time = time_row[0]
			
 
				+
			
 
				+            # 获取该时间的新闻数据，通过 JOIN 获取平台名称
			
 
				+            cursor.execute("""
			
 
				+                SELECT n.title, n.platform_id, p.name as platform_name,
			
 
				+                       n.rank, n.url, n.mobile_url,
			
 
				+                       n.first_crawl_time, n.last_crawl_time, n.crawl_count
			
 
				+                FROM news_items n
			
 
				+                LEFT JOIN platforms p ON n.platform_id = p.id
			
 
				+                WHERE n.last_crawl_time = ?
			
 
				+            """, (latest_time,))
			
 
				+
			
 
				+            rows = cursor.fetchall()
			
 
				+            if not rows:
			
 
				+                return None
			
 
				+
			
 
				+            items: Dict[str, List[NewsItem]] = {}
			
 
				+            id_to_name: Dict[str, str] = {}
			
 
				+            crawl_date = self._format_date_folder(date)
			
 
				+
			
 
				+            for row in rows:
			
 
				+                platform_id = row[1]
			
 
				+                platform_name = row[2] or platform_id
			
 
				+                id_to_name[platform_id] = platform_name
			
 
				+
			
 
				+                if platform_id not in items:
			
 
				+                    items[platform_id] = []
			
 
				+
			
 
				+                items[platform_id].append(NewsItem(
			
 
				+                    title=row[0],
			
 
				+                    source_id=platform_id,
			
 
				+                    source_name=platform_name,
			
 
				+                    rank=row[3],
			
 
				+                    url=row[4] or "",
			
 
				+                    mobile_url=row[5] or "",
			
 
				+                    crawl_time=row[7],  # last_crawl_time
			
 
				+                    ranks=[row[3]],
			
 
				+                    first_time=row[6],  # first_crawl_time
			
 
				+                    last_time=row[7],   # last_crawl_time
			
 
				+                    count=row[8],       # crawl_count
			
 
				+                ))
			
 
				+
			
 
				+            # 获取失败的来源（针对最新一次抓取）
			
 
				+            cursor.execute("""
			
 
				+                SELECT css.platform_id
			
 
				+                FROM crawl_source_status css
			
 
				+                JOIN crawl_records cr ON css.crawl_record_id = cr.id
			
 
				+                WHERE cr.crawl_time = ? AND css.status = 'failed'
			
 
				+            """, (latest_time,))
			
 
				+
			
 
				+            failed_ids = [row[0] for row in cursor.fetchall()]
			
 
				+
			
 
				+            return NewsData(
			
 
				+                date=crawl_date,
			
 
				+                crawl_time=latest_time,
			
 
				+                items=items,
			
 
				+                id_to_name=id_to_name,
			
 
				+                failed_ids=failed_ids,
			
 
				+            )
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            print(f"[远程存储] 获取最新数据失败: {e}")
			
 
				+            return None
			
 
				+
			
 
				+    def detect_new_titles(self, current_data: NewsData) -> Dict[str, Dict]:
			
 
				+        """检测新增的标题"""
			
 
				+        try:
			
 
				+            historical_data = self.get_today_all_data(current_data.date)
			
 
				+
			
 
				+            if not historical_data:
			
 
				+                new_titles = {}
			
 
				+                for source_id, news_list in current_data.items.items():
			
 
				+                    new_titles[source_id] = {item.title: item for item in news_list}
			
 
				+                return new_titles
			
 
				+
			
 
				+            historical_titles: Dict[str, set] = {}
			
 
				+            for source_id, news_list in historical_data.items.items():
			
 
				+                historical_titles[source_id] = {item.title for item in news_list}
			
 
				+
			
 
				+            new_titles = {}
			
 
				+            for source_id, news_list in current_data.items.items():
			
 
				+                hist_set = historical_titles.get(source_id, set())
			
 
				+                for item in news_list:
			
 
				+                    if item.title not in hist_set:
			
 
				+                        if source_id not in new_titles:
			
 
				+                            new_titles[source_id] = {}
			
 
				+                        new_titles[source_id][item.title] = item
			
 
				+
			
 
				+            return new_titles
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            print(f"[远程存储] 检测新标题失败: {e}")
			
 
				+            return {}
			
 
				+
			
 
				+    def save_txt_snapshot(self, data: NewsData) -> Optional[str]:
			
 
				+        """保存 TXT 快照（R2 模式下默认不支持）"""
			
 
				+        if not self.enable_txt:
			
 
				+            return None
			
 
				+
			
 
				+        # 如果启用，保存到本地临时目录
			
 
				+        try:
			
 
				+            date_folder = self._format_date_folder(data.date)
			
 
				+            txt_dir = self.temp_dir / date_folder / "txt"
			
 
				+            txt_dir.mkdir(parents=True, exist_ok=True)
			
 
				+
			
 
				+            file_path = txt_dir / f"{data.crawl_time}.txt"
			
 
				+
			
 
				+            with open(file_path, "w", encoding="utf-8") as f:
			
 
				+                for source_id, news_list in data.items.items():
			
 
				+                    source_name = data.id_to_name.get(source_id, source_id)
			
 
				+
			
 
				+                    if source_name and source_name != source_id:
			
 
				+                        f.write(f"{source_id} | {source_name}\n")
			
 
				+                    else:
			
 
				+                        f.write(f"{source_id}\n")
			
 
				+
			
 
				+                    sorted_news = sorted(news_list, key=lambda x: x.rank)
			
 
				+
			
 
				+                    for item in sorted_news:
			
 
				+                        line = f"{item.rank}. {item.title}"
			
 
				+                        if item.url:
			
 
				+                            line += f" [URL:{item.url}]"
			
 
				+                        if item.mobile_url:
			
 
				+                            line += f" [MOBILE:{item.mobile_url}]"
			
 
				+                        f.write(line + "\n")
			
 
				+
			
 
				+                    f.write("\n")
			
 
				+
			
 
				+                if data.failed_ids:
			
 
				+                    f.write("==== 以下ID请求失败 ====\n")
			
 
				+                    for failed_id in data.failed_ids:
			
 
				+                        f.write(f"{failed_id}\n")
			
 
				+
			
 
				+            print(f"[远程存储] TXT 快照已保存: {file_path}")
			
 
				+            return str(file_path)
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            print(f"[远程存储] 保存 TXT 快照失败: {e}")
			
 
				+            return None
			
 
				+
			
 
				+    def save_html_report(self, html_content: str, filename: str, is_summary: bool = False) -> Optional[str]:
			
 
				+        """保存 HTML 报告到临时目录"""
			
 
				+        if not self.enable_html:
			
 
				+            return None
			
 
				+
			
 
				+        try:
			
 
				+            date_folder = self._format_date_folder()
			
 
				+            html_dir = self.temp_dir / date_folder / "html"
			
 
				+            html_dir.mkdir(parents=True, exist_ok=True)
			
 
				+
			
 
				+            file_path = html_dir / filename
			
 
				+
			
 
				+            with open(file_path, "w", encoding="utf-8") as f:
			
 
				+                f.write(html_content)
			
 
				+
			
 
				+            print(f"[远程存储] HTML 报告已保存: {file_path}")
			
 
				+            return str(file_path)
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            print(f"[远程存储] 保存 HTML 报告失败: {e}")
			
 
				+            return None
			
 
				+
			
 
				+    def is_first_crawl_today(self, date: Optional[str] = None) -> bool:
			
 
				+        """检查是否是当天第一次抓取"""
			
 
				+        try:
			
 
				+            conn = self._get_connection(date)
			
 
				+            cursor = conn.cursor()
			
 
				+
			
 
				+            cursor.execute("""
			
 
				+                SELECT COUNT(*) as count FROM crawl_records
			
 
				+            """)
			
 
				+
			
 
				+            row = cursor.fetchone()
			
 
				+            count = row[0] if row else 0
			
 
				+
			
 
				+            return count <= 1
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            print(f"[远程存储] 检查首次抓取失败: {e}")
			
 
				+            return True
			
 
				+
			
 
				+    def cleanup(self) -> None:
			
 
				+        """清理资源（关闭连接和删除临时文件）"""
			
 
				+        # 检查 Python 是否正在关闭
			
 
				+        if sys.meta_path is None:
			
 
				+            return
			
 
				+
			
 
				+        # 关闭数据库连接
			
 
				+        db_connections = getattr(self, "_db_connections", {})
			
 
				+        for db_path, conn in list(db_connections.items()):
			
 
				+            try:
			
 
				+                conn.close()
			
 
				+                print(f"[远程存储] 关闭数据库连接: {db_path}")
			
 
				+            except Exception as e:
			
 
				+                print(f"[远程存储] 关闭连接失败 {db_path}: {e}")
			
 
				+
			
 
				+        if db_connections:
			
 
				+            db_connections.clear()
			
 
				+
			
 
				+        # 删除临时目录
			
 
				+        temp_dir = getattr(self, "temp_dir", None)
			
 
				+        if temp_dir:
			
 
				+            try:
			
 
				+                if temp_dir.exists():
			
 
				+                    shutil.rmtree(temp_dir)
			
 
				+                    print(f"[远程存储] 临时目录已清理: {temp_dir}")
			
 
				+            except Exception as e:
			
 
				+                # 忽略 Python 关闭时的错误
			
 
				+                if sys.meta_path is not None:
			
 
				+                    print(f"[远程存储] 清理临时目录失败: {e}")
			
 
				+
			
 
				+        downloaded_files = getattr(self, "_downloaded_files", None)
			
 
				+        if downloaded_files:
			
 
				+            downloaded_files.clear()
			
 
				+
			
 
				+    def cleanup_old_data(self, retention_days: int) -> int:
			
 
				+        """
			
 
				+        清理 R2 上的过期数据
			
 
				+
			
 
				+        Args:
			
 
				+            retention_days: 保留天数（0 表示不清理）
			
 
				+
			
 
				+        Returns:
			
 
				+            删除的数据库文件数量
			
 
				+        """
			
 
				+        if retention_days <= 0:
			
 
				+            return 0
			
 
				+
			
 
				+        deleted_count = 0
			
 
				+        cutoff_date = self._get_configured_time() - timedelta(days=retention_days)
			
 
				+
			
 
				+        try:
			
 
				+            # 列出 R2 中 news/ 前缀下的所有对象
			
 
				+            paginator = self.s3_client.get_paginator('list_objects_v2')
			
 
				+            pages = paginator.paginate(Bucket=self.bucket_name, Prefix="news/")
			
 
				+
			
 
				+            # 收集需要删除的对象键
			
 
				+            objects_to_delete = []
			
 
				+            deleted_dates = set()
			
 
				+
			
 
				+            for page in pages:
			
 
				+                if 'Contents' not in page:
			
 
				+                    continue
			
 
				+
			
 
				+                for obj in page['Contents']:
			
 
				+                    key = obj['Key']
			
 
				+
			
 
				+                    # 解析日期（格式: news/YYYY-MM-DD.db 或 news/YYYY年MM月DD日.db）
			
 
				+                    folder_date = None
			
 
				+                    try:
			
 
				+                        # ISO 格式: news/YYYY-MM-DD.db
			
 
				+                        date_match = re.match(r'news/(\d{4})-(\d{2})-(\d{2})\.db$', key)
			
 
				+                        if date_match:
			
 
				+                            folder_date = datetime(
			
 
				+                                int(date_match.group(1)),
			
 
				+                                int(date_match.group(2)),
			
 
				+                                int(date_match.group(3)),
			
 
				+                                tzinfo=pytz.timezone("Asia/Shanghai")
			
 
				+                            )
			
 
				+                            date_str = f"{date_match.group(1)}-{date_match.group(2)}-{date_match.group(3)}"
			
 
				+                        else:
			
 
				+                            # 旧中文格式: news/YYYY年MM月DD日.db
			
 
				+                            date_match = re.match(r'news/(\d{4})年(\d{2})月(\d{2})日\.db$', key)
			
 
				+                            if date_match:
			
 
				+                                folder_date = datetime(
			
 
				+                                    int(date_match.group(1)),
			
 
				+                                    int(date_match.group(2)),
			
 
				+                                    int(date_match.group(3)),
			
 
				+                                    tzinfo=pytz.timezone("Asia/Shanghai")
			
 
				+                                )
			
 
				+                                date_str = f"{date_match.group(1)}年{date_match.group(2)}月{date_match.group(3)}日"
			
 
				+                    except Exception:
			
 
				+                        continue
			
 
				+
			
 
				+                    if folder_date and folder_date < cutoff_date:
			
 
				+                        objects_to_delete.append({'Key': key})
			
 
				+                        deleted_dates.add(date_str)
			
 
				+
			
 
				+            # 批量删除对象（每次最多 1000 个）
			
 
				+            if objects_to_delete:
			
 
				+                batch_size = 1000
			
 
				+                for i in range(0, len(objects_to_delete), batch_size):
			
 
				+                    batch = objects_to_delete[i:i + batch_size]
			
 
				+                    try:
			
 
				+                        self.s3_client.delete_objects(
			
 
				+                            Bucket=self.bucket_name,
			
 
				+                            Delete={'Objects': batch}
			
 
				+                        )
			
 
				+                        print(f"[远程存储] 删除 {len(batch)} 个对象")
			
 
				+                    except Exception as e:
			
 
				+                        print(f"[远程存储] 批量删除失败: {e}")
			
 
				+
			
 
				+                deleted_count = len(deleted_dates)
			
 
				+                for date_str in sorted(deleted_dates):
			
 
				+                    print(f"[远程存储] 清理过期数据: news/{date_str}.db")
			
 
				+
			
 
				+                print(f"[远程存储] 共清理 {deleted_count} 个过期日期数据库文件")
			
 
				+
			
 
				+            return deleted_count
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            print(f"[远程存储] 清理过期数据失败: {e}")
			
 
				+            return deleted_count
			
 
				+
			
 
				+    def has_pushed_today(self, date: Optional[str] = None) -> bool:
			
 
				+        """
			
 
				+        检查指定日期是否已推送过
			
 
				+
			
 
				+        Args:
			
 
				+            date: 日期字符串（YYYY-MM-DD），默认为今天
			
 
				+
			
 
				+        Returns:
			
 
				+            是否已推送
			
 
				+        """
			
 
				+        try:
			
 
				+            conn = self._get_connection(date)
			
 
				+            cursor = conn.cursor()
			
 
				+
			
 
				+            target_date = self._format_date_folder(date)
			
 
				+
			
 
				+            cursor.execute("""
			
 
				+                SELECT pushed FROM push_records WHERE date = ?
			
 
				+            """, (target_date,))
			
 
				+
			
 
				+            row = cursor.fetchone()
			
 
				+            if row:
			
 
				+                return bool(row[0])
			
 
				+            return False
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            print(f"[远程存储] 检查推送记录失败: {e}")
			
 
				+            return False
			
 
				+
			
 
				+    def record_push(self, report_type: str, date: Optional[str] = None) -> bool:
			
 
				+        """
			
 
				+        记录推送
			
 
				+
			
 
				+        Args:
			
 
				+            report_type: 报告类型
			
 
				+            date: 日期字符串（YYYY-MM-DD），默认为今天
			
 
				+
			
 
				+        Returns:
			
 
				+            是否记录成功
			
 
				+        """
			
 
				+        try:
			
 
				+            conn = self._get_connection(date)
			
 
				+            cursor = conn.cursor()
			
 
				+
			
 
				+            target_date = self._format_date_folder(date)
			
 
				+            now_str = self._get_configured_time().strftime("%Y-%m-%d %H:%M:%S")
			
 
				+
			
 
				+            cursor.execute("""
			
 
				+                INSERT INTO push_records (date, pushed, push_time, report_type, created_at)
			
 
				+                VALUES (?, 1, ?, ?, ?)
			
 
				+                ON CONFLICT(date) DO UPDATE SET
			
 
				+                    pushed = 1,
			
 
				+                    push_time = excluded.push_time,
			
 
				+                    report_type = excluded.report_type
			
 
				+            """, (target_date, now_str, report_type, now_str))
			
 
				+
			
 
				+            conn.commit()
			
 
				+
			
 
				+            print(f"[远程存储] 推送记录已保存: {report_type} at {now_str}")
			
 
				+
			
 
				+            # 上传到 R2 确保记录持久化
			
 
				+            if self._upload_sqlite(date):
			
 
				+                print(f"[远程存储] 推送记录已同步到 R2")
			
 
				+                return True
			
 
				+            else:
			
 
				+                print(f"[远程存储] 推送记录同步到 R2 失败")
			
 
				+                return False
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            print(f"[远程存储] 记录推送失败: {e}")
			
 
				+            return False
			
 
				+
			
 
				+    def __del__(self):
			
 
				+        """析构函数"""
			
 
				+        # 检查 Python 是否正在关闭
			
 
				+        if sys.meta_path is None:
			
 
				+            return
			
 
				+        try:
			
 
				+            self.cleanup()
			
 
				+        except Exception:
			
 
				+            # Python 关闭时可能会出错，忽略即可
			
 
				+            pass
			
 
				+
			
 
				+    def pull_recent_days(self, days: int, local_data_dir: str = "output") -> int:
			
 
				+        """
			
 
				+        从远程拉取最近 N 天的数据到本地
			
 
				+
			
 
				+        Args:
			
 
				+            days: 拉取天数
			
 
				+            local_data_dir: 本地数据目录
			
 
				+
			
 
				+        Returns:
			
 
				+            成功拉取的数据库文件数量
			
 
				+        """
			
 
				+        if days <= 0:
			
 
				+            return 0
			
 
				+
			
 
				+        local_dir = Path(local_data_dir)
			
 
				+        local_dir.mkdir(parents=True, exist_ok=True)
			
 
				+
			
 
				+        pulled_count = 0
			
 
				+        now = self._get_configured_time()
			
 
				+
			
 
				+        print(f"[远程存储] 开始拉取最近 {days} 天的数据...")
			
 
				+
			
 
				+        for i in range(days):
			
 
				+            date = now - timedelta(days=i)
			
 
				+            date_str = date.strftime("%Y-%m-%d")
			
 
				+
			
 
				+            # 本地目标路径
			
 
				+            local_date_dir = local_dir / date_str
			
 
				+            local_db_path = local_date_dir / "news.db"
			
 
				+
			
 
				+            # 如果本地已存在，跳过
			
 
				+            if local_db_path.exists():
			
 
				+                print(f"[远程存储] 跳过（本地已存在）: {date_str}")
			
 
				+                continue
			
 
				+
			
 
				+            # 远程对象键
			
 
				+            remote_key = f"news/{date_str}.db"
			
 
				+
			
 
				+            # 检查远程是否存在
			
 
				+            if not self._check_object_exists(remote_key):
			
 
				+                print(f"[远程存储] 跳过（远程不存在）: {date_str}")
			
 
				+                continue
			
 
				+
			
 
				+            # 下载
			
 
				+            try:
			
 
				+                local_date_dir.mkdir(parents=True, exist_ok=True)
			
 
				+                self.s3_client.download_file(
			
 
				+                    self.bucket_name,
			
 
				+                    remote_key,
			
 
				+                    str(local_db_path)
			
 
				+                )
			
 
				+                print(f"[远程存储] 已拉取: {remote_key} -> {local_db_path}")
			
 
				+                pulled_count += 1
			
 
				+            except Exception as e:
			
 
				+                print(f"[远程存储] 拉取失败 ({date_str}): {e}")
			
 
				+
			
 
				+        print(f"[远程存储] 拉取完成，共下载 {pulled_count} 个数据库文件")
			
 
				+        return pulled_count
			
 
				+
			
 
				+    def list_remote_dates(self) -> List[str]:
			
 
				+        """
			
 
				+        列出远程存储中所有可用的日期
			
 
				+
			
 
				+        Returns:
			
 
				+            日期字符串列表（YYYY-MM-DD 格式）
			
 
				+        """
			
 
				+        dates = []
			
 
				+
			
 
				+        try:
			
 
				+            paginator = self.s3_client.get_paginator('list_objects_v2')
			
 
				+            pages = paginator.paginate(Bucket=self.bucket_name, Prefix="news/")
			
 
				+
			
 
				+            for page in pages:
			
 
				+                if 'Contents' not in page:
			
 
				+                    continue
			
 
				+
			
 
				+                for obj in page['Contents']:
			
 
				+                    key = obj['Key']
			
 
				+                    # 解析日期
			
 
				+                    date_match = re.match(r'news/(\d{4}-\d{2}-\d{2})\.db$', key)
			
 
				+                    if date_match:
			
 
				+                        dates.append(date_match.group(1))
			
 
				+
			
 
				+            return sorted(dates, reverse=True)
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            print(f"[远程存储] 列出远程日期失败: {e}")
			
 
				+            return []
			
--- a/trendradar/storage/schema.sql
+++ b/trendradar/storage/schema.sql
@@ -0,0 +1,117 @@
 
				+-- TrendRadar 数据库表结构
			
 
				+
			
 
				+-- ============================================
			
 
				+-- 平台信息表
			
 
				+-- 核心：id 不变，name 可变
			
 
				+-- ============================================
			
 
				+CREATE TABLE IF NOT EXISTS platforms (
			
 
				+    id TEXT PRIMARY KEY,
			
 
				+    name TEXT NOT NULL,
			
 
				+    is_active INTEGER DEFAULT 1,
			
 
				+    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
			
 
				+);
			
 
				+
			
 
				+-- ============================================
			
 
				+-- 新闻条目表
			
 
				+-- 以 URL + platform_id 为唯一标识，支持去重存储
			
 
				+-- ============================================
			
 
				+CREATE TABLE IF NOT EXISTS news_items (
			
 
				+    id INTEGER PRIMARY KEY AUTOINCREMENT,
			
 
				+    title TEXT NOT NULL,
			
 
				+    platform_id TEXT NOT NULL,
			
 
				+    rank INTEGER NOT NULL,
			
 
				+    url TEXT DEFAULT '',
			
 
				+    mobile_url TEXT DEFAULT '',
			
 
				+    first_crawl_time TEXT NOT NULL,      -- 首次抓取时间
			
 
				+    last_crawl_time TEXT NOT NULL,       -- 最后抓取时间
			
 
				+    crawl_count INTEGER DEFAULT 1,       -- 抓取次数
			
 
				+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
			
 
				+    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
			
 
				+    FOREIGN KEY (platform_id) REFERENCES platforms(id)
			
 
				+);
			
 
				+
			
 
				+-- ============================================
			
 
				+-- 标题变更历史表
			
 
				+-- 记录同一 URL 下标题的变化
			
 
				+-- ============================================
			
 
				+CREATE TABLE IF NOT EXISTS title_changes (
			
 
				+    id INTEGER PRIMARY KEY AUTOINCREMENT,
			
 
				+    news_item_id INTEGER NOT NULL,
			
 
				+    old_title TEXT NOT NULL,
			
 
				+    new_title TEXT NOT NULL,
			
 
				+    changed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
			
 
				+    FOREIGN KEY (news_item_id) REFERENCES news_items(id)
			
 
				+);
			
 
				+
			
 
				+-- ============================================
			
 
				+-- 排名历史表
			
 
				+-- 记录每次抓取时的排名变化
			
 
				+-- ============================================
			
 
				+CREATE TABLE IF NOT EXISTS rank_history (
			
 
				+    id INTEGER PRIMARY KEY AUTOINCREMENT,
			
 
				+    news_item_id INTEGER NOT NULL,
			
 
				+    rank INTEGER NOT NULL,
			
 
				+    crawl_time TEXT NOT NULL,
			
 
				+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
			
 
				+    FOREIGN KEY (news_item_id) REFERENCES news_items(id)
			
 
				+);
			
 
				+
			
 
				+-- ============================================
			
 
				+-- 抓取记录表
			
 
				+-- 记录每次抓取的时间和数量
			
 
				+-- ============================================
			
 
				+CREATE TABLE IF NOT EXISTS crawl_records (
			
 
				+    id INTEGER PRIMARY KEY AUTOINCREMENT,
			
 
				+    crawl_time TEXT NOT NULL UNIQUE,
			
 
				+    total_items INTEGER DEFAULT 0,
			
 
				+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
			
 
				+);
			
 
				+
			
 
				+-- ============================================
			
 
				+-- 抓取来源状态表
			
 
				+-- 记录每次抓取各平台的成功/失败状态
			
 
				+-- ============================================
			
 
				+CREATE TABLE IF NOT EXISTS crawl_source_status (
			
 
				+    crawl_record_id INTEGER NOT NULL,
			
 
				+    platform_id TEXT NOT NULL,
			
 
				+    status TEXT NOT NULL CHECK(status IN ('success', 'failed')),
			
 
				+    PRIMARY KEY (crawl_record_id, platform_id),
			
 
				+    FOREIGN KEY (crawl_record_id) REFERENCES crawl_records(id),
			
 
				+    FOREIGN KEY (platform_id) REFERENCES platforms(id)
			
 
				+);
			
 
				+
			
 
				+-- ============================================
			
 
				+-- 推送记录表
			
 
				+-- 用于 push_window once_per_day 功能
			
 
				+-- ============================================
			
 
				+CREATE TABLE IF NOT EXISTS push_records (
			
 
				+    id INTEGER PRIMARY KEY AUTOINCREMENT,
			
 
				+    date TEXT NOT NULL UNIQUE,
			
 
				+    pushed INTEGER DEFAULT 0,
			
 
				+    push_time TEXT,
			
 
				+    report_type TEXT,
			
 
				+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
			
 
				+);
			
 
				+
			
 
				+-- ============================================
			
 
				+-- 索引定义
			
 
				+-- ============================================
			
 
				+
			
 
				+-- 平台索引
			
 
				+CREATE INDEX IF NOT EXISTS idx_news_platform ON news_items(platform_id);
			
 
				+
			
 
				+-- 时间索引（用于查询最新数据）
			
 
				+CREATE INDEX IF NOT EXISTS idx_news_crawl_time ON news_items(last_crawl_time);
			
 
				+
			
 
				+-- 标题索引（用于标题搜索）
			
 
				+CREATE INDEX IF NOT EXISTS idx_news_title ON news_items(title);
			
 
				+
			
 
				+-- URL + platform_id 唯一索引（仅对非空 URL，实现去重）
			
 
				+CREATE UNIQUE INDEX IF NOT EXISTS idx_news_url_platform
			
 
				+    ON news_items(url, platform_id) WHERE url != '';
			
 
				+
			
 
				+-- 抓取状态索引
			
 
				+CREATE INDEX IF NOT EXISTS idx_crawl_status_record ON crawl_source_status(crawl_record_id);
			
 
				+
			
 
				+-- 排名历史索引
			
 
				+CREATE INDEX IF NOT EXISTS idx_rank_history_news ON rank_history(news_item_id);
			
--- a/trendradar/utils/__init__.py
+++ b/trendradar/utils/__init__.py
@@ -0,0 +1,20 @@
 
				+# coding=utf-8
			
 
				+"""
			
 
				+工具模块 - 公共工具函数
			
 
				+"""
			
 
				+
			
 
				+from trendradar.utils.time import (
			
 
				+    get_configured_time,
			
 
				+    format_date_folder,
			
 
				+    format_time_filename,
			
 
				+    get_current_time_display,
			
 
				+    convert_time_for_display,
			
 
				+)
			
 
				+
			
 
				+__all__ = [
			
 
				+    "get_configured_time",
			
 
				+    "format_date_folder",
			
 
				+    "format_time_filename",
			
 
				+    "get_current_time_display",
			
 
				+    "convert_time_for_display",
			
 
				+]
			
--- a/trendradar/utils/time.py
+++ b/trendradar/utils/time.py
@@ -0,0 +1,91 @@
 
				+# coding=utf-8
			
 
				+"""
			
 
				+时间工具模块 - 统一时间处理函数
			
 
				+"""
			
 
				+
			
 
				+from datetime import datetime
			
 
				+from typing import Optional
			
 
				+
			
 
				+import pytz
			
 
				+
			
 
				+# 默认时区
			
 
				+DEFAULT_TIMEZONE = "Asia/Shanghai"
			
 
				+
			
 
				+
			
 
				+def get_configured_time(timezone: str = DEFAULT_TIMEZONE) -> datetime:
			
 
				+    """
			
 
				+    获取配置时区的当前时间
			
 
				+
			
 
				+    Args:
			
 
				+        timezone: 时区名称，如 'Asia/Shanghai', 'America/Los_Angeles'
			
 
				+
			
 
				+    Returns:
			
 
				+        带时区信息的当前时间
			
 
				+    """
			
 
				+    try:
			
 
				+        tz = pytz.timezone(timezone)
			
 
				+    except pytz.UnknownTimeZoneError:
			
 
				+        print(f"[警告] 未知时区 '{timezone}'，使用默认时区 {DEFAULT_TIMEZONE}")
			
 
				+        tz = pytz.timezone(DEFAULT_TIMEZONE)
			
 
				+    return datetime.now(tz)
			
 
				+
			
 
				+
			
 
				+def format_date_folder(
			
 
				+    date: Optional[str] = None, timezone: str = DEFAULT_TIMEZONE
			
 
				+) -> str:
			
 
				+    """
			
 
				+    格式化日期文件夹名 (ISO 格式: YYYY-MM-DD)
			
 
				+
			
 
				+    Args:
			
 
				+        date: 指定日期字符串，为 None 则使用当前日期
			
 
				+        timezone: 时区名称
			
 
				+
			
 
				+    Returns:
			
 
				+        格式化后的日期字符串，如 '2025-12-09'
			
 
				+    """
			
 
				+    if date:
			
 
				+        return date
			
 
				+    return get_configured_time(timezone).strftime("%Y-%m-%d")
			
 
				+
			
 
				+
			
 
				+def format_time_filename(timezone: str = DEFAULT_TIMEZONE) -> str:
			
 
				+    """
			
 
				+    格式化时间文件名 (格式: HH-MM，用于文件名)
			
 
				+
			
 
				+    Windows 系统不支持冒号作为文件名，因此使用连字符
			
 
				+
			
 
				+    Args:
			
 
				+        timezone: 时区名称
			
 
				+
			
 
				+    Returns:
			
 
				+        格式化后的时间字符串，如 '15-30'
			
 
				+    """
			
 
				+    return get_configured_time(timezone).strftime("%H-%M")
			
 
				+
			
 
				+
			
 
				+def get_current_time_display(timezone: str = DEFAULT_TIMEZONE) -> str:
			
 
				+    """
			
 
				+    获取当前时间显示 (格式: HH:MM，用于显示)
			
 
				+
			
 
				+    Args:
			
 
				+        timezone: 时区名称
			
 
				+
			
 
				+    Returns:
			
 
				+        格式化后的时间字符串，如 '15:30'
			
 
				+    """
			
 
				+    return get_configured_time(timezone).strftime("%H:%M")
			
 
				+
			
 
				+
			
 
				+def convert_time_for_display(time_str: str) -> str:
			
 
				+    """
			
 
				+    将 HH-MM 格式转换为 HH:MM 格式用于显示
			
 
				+
			
 
				+    Args:
			
 
				+        time_str: 输入时间字符串，如 '15-30'
			
 
				+
			
 
				+    Returns:
			
 
				+        转换后的时间字符串，如 '15:30'
			
 
				+    """
			
 
				+    if time_str and "-" in time_str and len(time_str) == 5:
			
 
				+        return time_str.replace("-", ":")
			
 
				+    return time_str
			
--- a/version
+++ b/version
@@ -1 +1 @@
 
				-3.5.0
			
 
				+4.0.0
@@ -1 +1 @@
 				-3.5.0
 				+4.0.0