6 miesięcy temu · d14eba178e
--- a/mcp_server/__init__.py
+++ b/mcp_server/__init__.py
@@ -5,4 +5,4 @@ TrendRadar MCP Server
 
				 
			
 
				 """
			
 
				 
			
 
				-__version__ = "3.1.6"
			
 
				+__version__ = "3.1.7"
			
--- a/mcp_server/tools/analytics.py
+++ b/mcp_server/tools/analytics.py
@@ -4,12 +4,17 @@
 
				 提供热度趋势分析、平台对比、关键词共现、情感分析等高级分析功能。
			
 
				 """
			
 
				 
			
 
				+import os
			
 
				 import re
			
 
				 from collections import Counter, defaultdict
			
 
				 from datetime import datetime, timedelta
			
 
				 from typing import Dict, List, Optional, Union
			
 
				 from difflib import SequenceMatcher
			
 
				 
			
 
				+import yaml
			
 
				+
			
 
				+from trendradar.core.analyzer import calculate_news_weight as _calculate_news_weight
			
 
				+
			
 
				 from ..services.data_service import DataService
			
 
				 from ..utils.validators import (
			
 
				     validate_platforms,
			
@@ -22,13 +27,43 @@ from ..utils.validators import (
 
				 from ..utils.errors import MCPError, InvalidParameterError, DataNotFoundError
			
 
				 
			
 
				 
			
 
				+def _get_weight_config() -> Dict:
			
 
				+    """
			
 
				+    从 config.yaml 读取权重配置
			
 
				+
			
 
				+    Returns:
			
 
				+        权重配置字典，包含 RANK_WEIGHT, FREQUENCY_WEIGHT, HOTNESS_WEIGHT
			
 
				+    """
			
 
				+    # 默认值
			
 
				+    default_config = {
			
 
				+        "RANK_WEIGHT": 0.6,
			
 
				+        "FREQUENCY_WEIGHT": 0.3,
			
 
				+        "HOTNESS_WEIGHT": 0.1,
			
 
				+    }
			
 
				+
			
 
				+    try:
			
 
				+        current_dir = os.path.dirname(os.path.abspath(__file__))
			
 
				+        config_path = os.path.join(current_dir, "..", "..", "config", "config.yaml")
			
 
				+        config_path = os.path.normpath(config_path)
			
 
				+
			
 
				+        with open(config_path, 'r', encoding='utf-8') as f:
			
 
				+            config = yaml.safe_load(f)
			
 
				+            weight = config.get('advanced', {}).get('weight', {})
			
 
				+            return {
			
 
				+                "RANK_WEIGHT": weight.get('rank', 0.6),
			
 
				+                "FREQUENCY_WEIGHT": weight.get('frequency', 0.3),
			
 
				+                "HOTNESS_WEIGHT": weight.get('hotness', 0.1),
			
 
				+            }
			
 
				+    except Exception:
			
 
				+        return default_config
			
 
				+
			
 
				+
			
 
				 def calculate_news_weight(news_data: Dict, rank_threshold: int = 5) -> float:
			
 
				     """
			
 
				     计算新闻权重（用于排序）
			
 
				 
			
 
				-    - 排名权重 (60%)：新闻在榜单中的排名
			
 
				-    - 频次权重 (30%)：新闻出现的次数
			
 
				-    - 热度权重 (10%)：高排名出现的比例
			
 
				+    复用 trendradar.core.analyzer.calculate_news_weight 实现，
			
 
				+    权重配置从 config.yaml 的 advanced.weight 读取。
			
 
				 
			
 
				     Args:
			
 
				         news_data: 新闻数据字典，包含 ranks 和 count 字段
			
@@ -37,41 +72,7 @@ def calculate_news_weight(news_data: Dict, rank_threshold: int = 5) -> float:
 
				     Returns:
			
 
				         权重分数（0-100之间的浮点数）
			
 
				     """
			
 
				-    ranks = news_data.get("ranks", [])
			
 
				-    if not ranks:
			
 
				-        return 0.0
			
 
				-
			
 
				-    count = news_data.get("count", len(ranks))
			
 
				-
			
 
				-    # 权重配置（与 config.yaml 保持一致）
			
 
				-    RANK_WEIGHT = 0.6
			
 
				-    FREQUENCY_WEIGHT = 0.3
			
 
				-    HOTNESS_WEIGHT = 0.1
			
 
				-
			
 
				-    # 1. 排名权重：Σ(11 - min(rank, 10)) / 出现次数
			
 
				-    rank_scores = []
			
 
				-    for rank in ranks:
			
 
				-        score = 11 - min(rank, 10)
			
 
				-        rank_scores.append(score)
			
 
				-
			
 
				-    rank_weight = sum(rank_scores) / len(ranks) if ranks else 0
			
 
				-
			
 
				-    # 2. 频次权重：min(出现次数, 10) × 10
			
 
				-    frequency_weight = min(count, 10) * 10
			
 
				-
			
 
				-    # 3. 热度加成：高排名次数 / 总出现次数 × 100
			
 
				-    high_rank_count = sum(1 for rank in ranks if rank <= rank_threshold)
			
 
				-    hotness_ratio = high_rank_count / len(ranks) if ranks else 0
			
 
				-    hotness_weight = hotness_ratio * 100
			
 
				-
			
 
				-    # 综合权重
			
 
				-    total_weight = (
			
 
				-        rank_weight * RANK_WEIGHT
			
 
				-        + frequency_weight * FREQUENCY_WEIGHT
			
 
				-        + hotness_weight * HOTNESS_WEIGHT
			
 
				-    )
			
 
				-
			
 
				-    return total_weight
			
 
				+    return _calculate_news_weight(news_data, rank_threshold, _get_weight_config())
			
 
				 
			
 
				 
			
 
				 class AnalyticsTools:
			
@@ -2158,6 +2159,8 @@ class AnalyticsTools:
 
				         """
			
 
				         对新闻列表进行相似度聚合
			
 
				 
			
 
				+        使用双层过滤策略：先用 Jaccard 快速粗筛，再用 SequenceMatcher 精确计算
			
 
				+
			
 
				         Args:
			
 
				             news_list: 新闻列表
			
 
				             threshold: 相似度阈值
			
@@ -2169,17 +2172,31 @@ class AnalyticsTools:
 
				         if not news_list:
			
 
				             return []
			
 
				 
			
 
				-        # 按权重排序，优先保留高权重新闻作为代表
			
 
				-        sorted_news = sorted(news_list, key=lambda x: x.get("weight", 0), reverse=True)
			
 
				+        # 预计算字符集合用于快速过滤
			
 
				+        prepared_news = []
			
 
				+        for news in news_list:
			
 
				+            char_set = set(news["title"])
			
 
				+            prepared_news.append({
			
 
				+                "data": news,
			
 
				+                "char_set": char_set,
			
 
				+                "set_len": len(char_set)
			
 
				+            })
			
 
				+
			
 
				+        # 按权重排序
			
 
				+        sorted_items = sorted(prepared_news, key=lambda x: x["data"].get("weight", 0), reverse=True)
			
 
				 
			
 
				         aggregated = []
			
 
				         used_indices = set()
			
 
				+        PRE_FILTER_RATIO = 0.5  # 粗筛阈值系数
			
 
				 
			
 
				-        for i, news in enumerate(sorted_news):
			
 
				+        for i, item in enumerate(sorted_items):
			
 
				             if i in used_indices:
			
 
				                 continue
			
 
				 
			
 
				-            # 创建聚合组
			
 
				+            news = item["data"]
			
 
				+            base_set = item["char_set"]
			
 
				+            base_len = item["set_len"]
			
 
				+
			
 
				             group = {
			
 
				                 "representative_title": news["title"],
			
 
				                 "platforms": [news["platform_name"]],
			
@@ -2205,13 +2222,35 @@ class AnalyticsTools:
 
				             used_indices.add(i)
			
 
				 
			
 
				             # 查找相似新闻
			
 
				-            for j, other_news in enumerate(sorted_news):
			
 
				+            for j in range(i + 1, len(sorted_items)):
			
 
				                 if j in used_indices:
			
 
				                     continue
			
 
				 
			
 
				-                similarity = self._calculate_similarity(news["title"], other_news["title"])
			
 
				+                compare_item = sorted_items[j]
			
 
				+                compare_set = compare_item["char_set"]
			
 
				+                compare_len = compare_item["set_len"]
			
 
				+
			
 
				+                # 快速粗筛：长度检查
			
 
				+                if base_len == 0 or compare_len == 0:
			
 
				+                    continue
			
 
				+
			
 
				+                # 快速粗筛：长度比例检查
			
 
				+                if min(base_len, compare_len) / max(base_len, compare_len) < (threshold * PRE_FILTER_RATIO):
			
 
				+                    continue
			
 
				+
			
 
				+                # 快速粗筛：Jaccard 相似度
			
 
				+                intersection = len(base_set & compare_set)
			
 
				+                union = len(base_set | compare_set)
			
 
				+                jaccard_sim = intersection / union if union > 0 else 0
			
 
				+
			
 
				+                if jaccard_sim < (threshold * PRE_FILTER_RATIO):
			
 
				+                    continue
			
 
				+
			
 
				+                # 精确计算：SequenceMatcher
			
 
				+                other_news = compare_item["data"]
			
 
				+                real_similarity = self._calculate_similarity(news["title"], other_news["title"])
			
 
				 
			
 
				-                if similarity >= threshold:
			
 
				+                if real_similarity >= threshold:
			
 
				                     # 合并到当前组
			
 
				                     if other_news["platform_name"] not in group["platforms"]:
			
 
				                         group["platforms"].append(other_news["platform_name"])
			
--- a/mcp_server/tools/system.py
+++ b/mcp_server/tools/system.py
@@ -113,12 +113,18 @@ class SystemManagementTools:
 
				             with open(config_path, "r", encoding="utf-8") as f:
			
 
				                 config_data = yaml.safe_load(f)
			
 
				 
			
 
				-            # 获取平台配置
			
 
				-            all_platforms = config_data.get("platforms", [])
			
 
				+            # 获取平台配置（嵌套结构：{enabled: bool, sources: [...]})
			
 
				+            platforms_config = config_data.get("platforms", {})
			
 
				+            if not platforms_config.get("enabled", True):
			
 
				+                raise CrawlTaskError(
			
 
				+                    "热榜平台已禁用",
			
 
				+                    suggestion="请检查 config/config.yaml 中的 platforms.enabled 配置"
			
 
				+                )
			
 
				+            all_platforms = platforms_config.get("sources", [])
			
 
				             if not all_platforms:
			
 
				                 raise CrawlTaskError(
			
 
				                     "配置文件中没有平台配置",
			
 
				-                    suggestion="请检查 config/config.yaml 中的 platforms 配置"
			
 
				+                    suggestion="请检查 config/config.yaml 中的 platforms.sources 配置"
			
 
				                 )
			
 
				 
			
 
				             # 过滤平台
			
--- a/mcp_server/utils/validators.py
+++ b/mcp_server/utils/validators.py
@@ -167,8 +167,10 @@ def get_supported_platforms() -> List[str]:
 
				 
			
 
				         with open(config_path, 'r', encoding='utf-8') as f:
			
 
				             config = yaml.safe_load(f)
			
 
				-            platforms = config.get('platforms', [])
			
 
				-            return [p['id'] for p in platforms if 'id' in p]
			
 
				+            platforms_config = config.get('platforms', {})
			
 
				+            # 处理嵌套结构：{enabled: bool, sources: [...]}
			
 
				+            sources = platforms_config.get('sources', [])
			
 
				+            return [p['id'] for p in sources if 'id' in p]
			
 
				     except Exception as e:
			
 
				         # 降级方案：返回空列表，允许所有平台
			
 
				         print(f"警告：无法加载平台配置 ({config_path}): {e}")
			
--- a/version_mcp
+++ b/version_mcp
@@ -1 +1 @@
 
				-3.1.6
			
 
				+3.1.7
@@ -1 +1 @@
 				-3.1.6
 				+3.1.7