6 месяцев назад · d14eba178e
--- a/mcp_server/__init__.py
+++ b/mcp_server/__init__.py
@@ -5,4 +5,4 @@ TrendRadar MCP Server
 
															 """
														
 
															-__version__ = "3.1.6"
														
 
															+__version__ = "3.1.7"
														
--- a/mcp_server/tools/analytics.py
+++ b/mcp_server/tools/analytics.py
@@ -4,12 +4,17 @@
 
															 提供热度趋势分析、平台对比、关键词共现、情感分析等高级分析功能。
														
 
															 """
														
 
															+import os
														
 
															 import re
														
 
															 from collections import Counter, defaultdict
														
 
															 from datetime import datetime, timedelta
														
 
															 from typing import Dict, List, Optional, Union
														
 
															 from difflib import SequenceMatcher
														
 
															+import yaml
														
 
															+
														
 
															+from trendradar.core.analyzer import calculate_news_weight as _calculate_news_weight
														
 
															+
														
 
															 from ..services.data_service import DataService
														
 
															 from ..utils.validators import (
														
 
															     validate_platforms,
														
@@ -22,13 +27,43 @@ from ..utils.validators import (
 
															 from ..utils.errors import MCPError, InvalidParameterError, DataNotFoundError
														
 
															+def _get_weight_config() -> Dict:
														
 
															+    """
														
 
															+    从 config.yaml 读取权重配置
														
 
															+
														
 
															+    Returns:
														
 
															+        权重配置字典，包含 RANK_WEIGHT, FREQUENCY_WEIGHT, HOTNESS_WEIGHT
														
 
															+    """
														
 
															+    # 默认值
														
 
															+    default_config = {
														
 
															+        "RANK_WEIGHT": 0.6,
														
 
															+        "FREQUENCY_WEIGHT": 0.3,
														
 
															+        "HOTNESS_WEIGHT": 0.1,
														
 
															+    }
														
 
															+
														
 
															+    try:
														
 
															+        current_dir = os.path.dirname(os.path.abspath(__file__))
														
 
															+        config_path = os.path.join(current_dir, "..", "..", "config", "config.yaml")
														
 
															+        config_path = os.path.normpath(config_path)
														
 
															+
														
 
															+        with open(config_path, 'r', encoding='utf-8') as f:
														
 
															+            config = yaml.safe_load(f)
														
 
															+            weight = config.get('advanced', {}).get('weight', {})
														
 
															+            return {
														
 
															+                "RANK_WEIGHT": weight.get('rank', 0.6),
														
 
															+                "FREQUENCY_WEIGHT": weight.get('frequency', 0.3),
														
 
															+                "HOTNESS_WEIGHT": weight.get('hotness', 0.1),
														
 
															+            }
														
 
															+    except Exception:
														
 
															+        return default_config
														
 
															+
														
 
															+
														
 
															 def calculate_news_weight(news_data: Dict, rank_threshold: int = 5) -> float:
														
 
															     """
														
 
															     计算新闻权重（用于排序）
														
 
															-    - 排名权重 (60%)：新闻在榜单中的排名
														
 
															-    - 频次权重 (30%)：新闻出现的次数
														
 
															-    - 热度权重 (10%)：高排名出现的比例
														
 
															+    复用 trendradar.core.analyzer.calculate_news_weight 实现，
														
 
															+    权重配置从 config.yaml 的 advanced.weight 读取。
														
 
															     Args:
														
 
															         news_data: 新闻数据字典，包含 ranks 和 count 字段
														
@@ -37,41 +72,7 @@ def calculate_news_weight(news_data: Dict, rank_threshold: int = 5) -> float:
 
															     Returns:
														
 
															         权重分数（0-100之间的浮点数）
														
 
															     """
														
 
															-    ranks = news_data.get("ranks", [])
														
 
															-    if not ranks:
														
 
															-        return 0.0
														
 
															-
														
 
															-    count = news_data.get("count", len(ranks))
														
 
															-
														
 
															-    # 权重配置（与 config.yaml 保持一致）
														
 
															-    RANK_WEIGHT = 0.6
														
 
															-    FREQUENCY_WEIGHT = 0.3
														
 
															-    HOTNESS_WEIGHT = 0.1
														
 
															-
														
 
															-    # 1. 排名权重：Σ(11 - min(rank, 10)) / 出现次数
														
 
															-    rank_scores = []
														
 
															-    for rank in ranks:
														
 
															-        score = 11 - min(rank, 10)
														
 
															-        rank_scores.append(score)
														
 
															-
														
 
															-    rank_weight = sum(rank_scores) / len(ranks) if ranks else 0
														
 
															-
														
 
															-    # 2. 频次权重：min(出现次数, 10) × 10
														
 
															-    frequency_weight = min(count, 10) * 10
														
 
															-
														
 
															-    # 3. 热度加成：高排名次数 / 总出现次数 × 100
														
 
															-    high_rank_count = sum(1 for rank in ranks if rank <= rank_threshold)
														
 
															-    hotness_ratio = high_rank_count / len(ranks) if ranks else 0
														
 
															-    hotness_weight = hotness_ratio * 100
														
 
															-
														
 
															-    # 综合权重
														
 
															-    total_weight = (
														
 
															-        rank_weight * RANK_WEIGHT
														
 
															-        + frequency_weight * FREQUENCY_WEIGHT
														
 
															-        + hotness_weight * HOTNESS_WEIGHT
														
 
															-    )
														
 
															-
														
 
															-    return total_weight
														
 
															+    return _calculate_news_weight(news_data, rank_threshold, _get_weight_config())
														
 
															 class AnalyticsTools:
														
@@ -2158,6 +2159,8 @@ class AnalyticsTools:
 
															         """
														
 
															         对新闻列表进行相似度聚合
														
 
															+        使用双层过滤策略：先用 Jaccard 快速粗筛，再用 SequenceMatcher 精确计算
														
 
															+
														
 
															         Args:
														
 
															             news_list: 新闻列表
														
 
															             threshold: 相似度阈值
														
@@ -2169,17 +2172,31 @@ class AnalyticsTools:
 
															         if not news_list:
														
 
															             return []
														
 
															-        # 按权重排序，优先保留高权重新闻作为代表
														
 
															-        sorted_news = sorted(news_list, key=lambda x: x.get("weight", 0), reverse=True)
														
 
															+        # 预计算字符集合用于快速过滤
														
 
															+        prepared_news = []
														
 
															+        for news in news_list:
														
 
															+            char_set = set(news["title"])
														
 
															+            prepared_news.append({
														
 
															+                "data": news,
														
 
															+                "char_set": char_set,
														
 
															+                "set_len": len(char_set)
														
 
															+            })
														
 
															+
														
 
															+        # 按权重排序
														
 
															+        sorted_items = sorted(prepared_news, key=lambda x: x["data"].get("weight", 0), reverse=True)
														
 
															         aggregated = []
														
 
															         used_indices = set()
														
 
															+        PRE_FILTER_RATIO = 0.5  # 粗筛阈值系数
														
 
															-        for i, news in enumerate(sorted_news):
														
 
															+        for i, item in enumerate(sorted_items):
														
 
															             if i in used_indices:
														
 
															                 continue
														
 
															-            # 创建聚合组
														
 
															+            news = item["data"]
														
 
															+            base_set = item["char_set"]
														
 
															+            base_len = item["set_len"]
														
 
															+
														
 
															             group = {
														
 
															                 "representative_title": news["title"],
														
 
															                 "platforms": [news["platform_name"]],
														
@@ -2205,13 +2222,35 @@ class AnalyticsTools:
 
															             used_indices.add(i)
														
 
															             # 查找相似新闻
														
 
															-            for j, other_news in enumerate(sorted_news):
														
 
															+            for j in range(i + 1, len(sorted_items)):
														
 
															                 if j in used_indices:
														
 
															                     continue
														
 
															-                similarity = self._calculate_similarity(news["title"], other_news["title"])
														
 
															+                compare_item = sorted_items[j]
														
 
															+                compare_set = compare_item["char_set"]
														
 
															+                compare_len = compare_item["set_len"]
														
 
															+
														
 
															+                # 快速粗筛：长度检查
														
 
															+                if base_len == 0 or compare_len == 0:
														
 
															+                    continue
														
 
															+
														
 
															+                # 快速粗筛：长度比例检查
														
 
															+                if min(base_len, compare_len) / max(base_len, compare_len) < (threshold * PRE_FILTER_RATIO):
														
 
															+                    continue
														
 
															+
														
 
															+                # 快速粗筛：Jaccard 相似度
														
 
															+                intersection = len(base_set & compare_set)
														
 
															+                union = len(base_set | compare_set)
														
 
															+                jaccard_sim = intersection / union if union > 0 else 0
														
 
															+
														
 
															+                if jaccard_sim < (threshold * PRE_FILTER_RATIO):
														
 
															+                    continue
														
 
															+
														
 
															+                # 精确计算：SequenceMatcher
														
 
															+                other_news = compare_item["data"]
														
 
															+                real_similarity = self._calculate_similarity(news["title"], other_news["title"])
														
 
															-                if similarity >= threshold:
														
 
															+                if real_similarity >= threshold:
														
 
															                     # 合并到当前组
														
 
															                     if other_news["platform_name"] not in group["platforms"]:
														
 
															                         group["platforms"].append(other_news["platform_name"])
														
--- a/mcp_server/tools/system.py
+++ b/mcp_server/tools/system.py
@@ -113,12 +113,18 @@ class SystemManagementTools:
 
															             with open(config_path, "r", encoding="utf-8") as f:
														
 
															                 config_data = yaml.safe_load(f)
														
 
															-            # 获取平台配置
														
 
															-            all_platforms = config_data.get("platforms", [])
														
 
															+            # 获取平台配置（嵌套结构：{enabled: bool, sources: [...]})
														
 
															+            platforms_config = config_data.get("platforms", {})
														
 
															+            if not platforms_config.get("enabled", True):
														
 
															+                raise CrawlTaskError(
														
 
															+                    "热榜平台已禁用",
														
 
															+                    suggestion="请检查 config/config.yaml 中的 platforms.enabled 配置"
														
 
															+                )
														
 
															+            all_platforms = platforms_config.get("sources", [])
														
 
															             if not all_platforms:
														
 
															                 raise CrawlTaskError(
														
 
															                     "配置文件中没有平台配置",
														
 
															-                    suggestion="请检查 config/config.yaml 中的 platforms 配置"
														
 
															+                    suggestion="请检查 config/config.yaml 中的 platforms.sources 配置"
														
 
															                 )
														
 
															             # 过滤平台
														
--- a/mcp_server/utils/validators.py
+++ b/mcp_server/utils/validators.py
@@ -167,8 +167,10 @@ def get_supported_platforms() -> List[str]:
 
															         with open(config_path, 'r', encoding='utf-8') as f:
														
 
															             config = yaml.safe_load(f)
														
 
															-            platforms = config.get('platforms', [])
														
 
															-            return [p['id'] for p in platforms if 'id' in p]
														
 
															+            platforms_config = config.get('platforms', {})
														
 
															+            # 处理嵌套结构：{enabled: bool, sources: [...]}
														
 
															+            sources = platforms_config.get('sources', [])
														
 
															+            return [p['id'] for p in sources if 'id' in p]
														
 
															     except Exception as e:
														
 
															         # 降级方案：返回空列表，允许所有平台
														
 
															         print(f"警告：无法加载平台配置 ({config_path}): {e}")
														
--- a/version_mcp
+++ b/version_mcp
@@ -1 +1 @@
 
															-3.1.6
														
 
															+3.1.7