Procházet zdrojové kódy

fix(core): 修复排序权重量纲不对齐及清理死代码,升级至 v6.5.4

- rank_weight 值域 1~10 归一化到 10~100,与 frequency/hotness 对齐
- 修复 fallback 默认权重与 config.yaml 不一致(0.4/0.3/0.3 → 0.6/0.3/0.1)
- 清理死代码 TimeWindowChecker 和 get_url_signature
- 修复 CRON_SCHEDULE 校验正则在 Alpine grep 中报 Invalid range end
- 升级 setup-python v6 和 setup-uv v7 消除 Node.js 20 弃用警告
- bump version to v6.5.4
sansan před 1 měsícem
rodič
revize
655ef8d44c

+ 2 - 2
.github/workflows/crawler.yml

@@ -113,13 +113,13 @@ jobs:
 
       - name: Set up Python
         if: success()
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v6
         with:
           python-version: "3.12"
 
       - name: Install uv
         if: success()
-        uses: astral-sh/setup-uv@v5
+        uses: astral-sh/setup-uv@v7
 
       - name: Install dependencies
         if: success()

+ 1 - 1
README-EN.md

@@ -11,7 +11,7 @@ Deploy in <strong>30 seconds</strong> — Say goodbye to endless scrolling, only
 [![GitHub Stars](https://img.shields.io/github/stars/sansan0/TrendRadar?style=flat-square&logo=github&color=yellow)](https://github.com/sansan0/TrendRadar/stargazers)
 [![GitHub Forks](https://img.shields.io/github/forks/sansan0/TrendRadar?style=flat-square&logo=github&color=blue)](https://github.com/sansan0/TrendRadar/network/members)
 [![License](https://img.shields.io/badge/license-GPL--3.0-blue.svg?style=flat-square)](LICENSE)
-[![Version](https://img.shields.io/badge/version-v6.5.3-blue.svg)](https://github.com/sansan0/TrendRadar)
+[![Version](https://img.shields.io/badge/version-v6.5.4-blue.svg)](https://github.com/sansan0/TrendRadar)
 [![MCP](https://img.shields.io/badge/MCP-v4.0.1-green.svg)](https://github.com/sansan0/TrendRadar)
 [![RSS](https://img.shields.io/badge/RSS-Feed_Support-orange.svg?style=flat-square&logo=rss&logoColor=white)](https://github.com/sansan0/TrendRadar)
 [![AI Translation](https://img.shields.io/badge/AI-Multi--Language-purple.svg?style=flat-square)](https://github.com/sansan0/TrendRadar)

+ 1 - 1
README.md

@@ -12,7 +12,7 @@
 [![GitHub Stars](https://img.shields.io/github/stars/sansan0/TrendRadar?style=flat-square&logo=github&color=yellow)](https://github.com/sansan0/TrendRadar/stargazers)
 [![GitHub Forks](https://img.shields.io/github/forks/sansan0/TrendRadar?style=flat-square&logo=github&color=blue)](https://github.com/sansan0/TrendRadar/network/members)
 [![License](https://img.shields.io/badge/license-GPL--3.0-blue.svg?style=flat-square)](LICENSE)
-[![Version](https://img.shields.io/badge/version-v6.5.3-blue.svg)](https://github.com/sansan0/TrendRadar)
+[![Version](https://img.shields.io/badge/version-v6.5.4-blue.svg)](https://github.com/sansan0/TrendRadar)
 [![MCP](https://img.shields.io/badge/MCP-v4.0.1-green.svg)](https://github.com/sansan0/TrendRadar)
 [![RSS](https://img.shields.io/badge/RSS-订阅源支持-orange.svg?style=flat-square&logo=rss&logoColor=white)](https://github.com/sansan0/TrendRadar)
 [![AI翻译](https://img.shields.io/badge/AI-多语言推送-purple.svg?style=flat-square)](https://github.com/sansan0/TrendRadar)

+ 1 - 1
docker/entrypoint.sh

@@ -15,7 +15,7 @@ case "${RUN_MODE:-cron}" in
 "cron")
     # 校验 CRON_SCHEDULE 格式(仅允许 cron 表达式合法字符)
     CRON_EXPR="${CRON_SCHEDULE:-*/30 * * * *}"
-    if ! echo "$CRON_EXPR" | grep -qE '^[0-9*/,\-[:space:]]+$'; then
+    if ! echo "$CRON_EXPR" | grep -qE '^[0-9*/,[:space:]-]+$'; then
         echo "❌ CRON_SCHEDULE 格式非法: $CRON_EXPR"
         exit 1
     fi

+ 1 - 1
pyproject.toml

@@ -1,6 +1,6 @@
 [project]
 name = "trendradar"
-version = "6.5.3"
+version = "6.5.4"
 description = "TrendRadar - 热点新闻聚合与分析工具"
 requires-python = ">=3.12"
 dependencies = [

+ 1 - 1
trendradar/__init__.py

@@ -9,5 +9,5 @@ TrendRadar - 热点新闻聚合与分析工具
 
 from trendradar.context import AppContext
 
-__version__ = "6.5.3"
+__version__ = "6.5.4"
 __all__ = ["AppContext", "__version__"]

+ 4 - 3
trendradar/core/analyzer.py

@@ -44,7 +44,8 @@ def calculate_news_weight(
         if rank <= rank_threshold:
             high_rank_count += 1
 
-    rank_weight = rank_score_sum / len(ranks)
+    # 归一化到 0~100(与 frequency_weight、hotness_weight 量纲对齐)
+    rank_weight = (rank_score_sum / len(ranks)) * 10
 
     # 频次权重:min(出现次数, 10) × 10
     frequency_weight = min(count, 10) * 10
@@ -132,9 +133,9 @@ def count_word_frequency(
     # 默认权重配置
     if weight_config is None:
         weight_config = {
-            "RANK_WEIGHT": 0.4,
+            "RANK_WEIGHT": 0.6,
             "FREQUENCY_WEIGHT": 0.3,
-            "HOTNESS_WEIGHT": 0.3,
+            "HOTNESS_WEIGHT": 0.1,
         }
 
     # 默认时间转换函数

+ 1 - 2
trendradar/utils/__init__.py

@@ -10,7 +10,7 @@ from trendradar.utils.time import (
     get_current_time_display,
     convert_time_for_display,
 )
-from trendradar.utils.url import normalize_url, get_url_signature
+from trendradar.utils.url import normalize_url
 
 __all__ = [
     "get_configured_time",
@@ -19,5 +19,4 @@ __all__ = [
     "get_current_time_display",
     "convert_time_for_display",
     "normalize_url",
-    "get_url_signature",
 ]

+ 1 - 159
trendradar/utils/time.py

@@ -6,7 +6,7 @@
 """
 
 from datetime import datetime
-from typing import Optional, Tuple
+from typing import Optional
 
 import pytz
 
@@ -285,161 +285,3 @@ def calculate_days_old(iso_time: str, timezone: str = DEFAULT_TIMEZONE) -> Optio
     except Exception:
         return None
 
-
-class TimeWindowChecker:
-    """
-    时间窗口检查器
-
-    统一管理时间窗口控制逻辑,支持:
-    - 推送窗口控制 (push_window)
-    - AI 分析窗口控制 (analysis_window)
-    - once_per_day 功能
-    """
-
-    def __init__(
-        self,
-        storage_backend,
-        get_time_func=None,
-        window_name: str = "时间窗口",
-    ):
-        """
-        初始化时间窗口检查器
-
-        Args:
-            storage_backend: 存储后端实例
-            get_time_func: 获取当前时间的函数
-            window_name: 窗口名称(用于日志输出)
-        """
-        self.storage_backend = storage_backend
-        self.get_time_func = get_time_func or (lambda: get_configured_time(DEFAULT_TIMEZONE))
-        self.window_name = window_name
-
-    def is_in_time_range(self, start_time: str, end_time: str) -> bool:
-        """
-        检查当前时间是否在指定时间范围内
-
-        支持跨日时间窗口,例如:
-        - 正常窗口:09:00-21:00(当天 9 点到 21 点)
-        - 跨日窗口:22:00-02:00(当天 22 点到次日 2 点)
-
-        Args:
-            start_time: 开始时间(格式:HH:MM)
-            end_time: 结束时间(格式:HH:MM)
-
-        Returns:
-            是否在时间范围内
-        """
-        now = self.get_time_func()
-        current_time = now.strftime("%H:%M")
-
-        normalized_start = self._normalize_time(start_time)
-        normalized_end = self._normalize_time(end_time)
-        normalized_current = self._normalize_time(current_time)
-
-        # 判断是否跨日窗口(start > end 表示跨日,如 22:00-02:00)
-        if normalized_start <= normalized_end:
-            # 正常窗口:09:00-21:00
-            result = normalized_start <= normalized_current <= normalized_end
-        else:
-            # 跨日窗口:22:00-02:00
-            # 当前时间 >= 开始时间(如 23:00 >= 22:00)或 当前时间 <= 结束时间(如 01:00 <= 02:00)
-            result = normalized_current >= normalized_start or normalized_current <= normalized_end
-
-        if not result:
-            print(f"[{self.window_name}] 当前 {normalized_current},窗口 {normalized_start}-{normalized_end}")
-
-        return result
-
-    def _normalize_time(self, time_str: str) -> str:
-        """将时间字符串标准化为 HH:MM 格式"""
-        try:
-            parts = time_str.strip().split(":")
-            if len(parts) != 2:
-                raise ValueError(f"时间格式错误: {time_str}")
-
-            hour = int(parts[0])
-            minute = int(parts[1])
-
-            if not (0 <= hour <= 23 and 0 <= minute <= 59):
-                raise ValueError(f"时间范围错误: {time_str}")
-
-            return f"{hour:02d}:{minute:02d}"
-        except Exception as e:
-            print(f"[{self.window_name}] 时间格式化错误 '{time_str}': {e}")
-            return time_str
-
-    def check_window(
-        self,
-        window_config: dict,
-        check_once_per_day_func=None,
-        record_func=None,
-    ) -> Tuple[bool, str]:
-        """
-        统一的时间窗口检查逻辑
-
-        Args:
-            window_config: 窗口配置字典,包含:
-                - ENABLED: 是否启用窗口控制
-                - TIME_RANGE: {"START": "HH:MM", "END": "HH:MM"}
-                - ONCE_PER_DAY: 是否每天只执行一次
-            check_once_per_day_func: 检查今天是否已执行的函数
-            record_func: 记录执行的函数(成功后调用)
-
-        Returns:
-            (should_proceed, reason) 元组:
-            - should_proceed: 是否应该继续执行
-            - reason: 原因说明
-        """
-        if not window_config.get("ENABLED", False):
-            return True, "窗口控制未启用"
-
-        time_range = window_config.get("TIME_RANGE", {})
-        start_time = time_range.get("START", "00:00")
-        end_time = time_range.get("END", "23:59")
-
-        # 检查时间范围
-        if not self.is_in_time_range(start_time, end_time):
-            now = self.get_time_func()
-            return False, f"当前时间 {now.strftime('%H:%M')} 不在窗口 {start_time}-{end_time} 内"
-
-        # 检查 once_per_day
-        if window_config.get("ONCE_PER_DAY", False) and check_once_per_day_func:
-            if check_once_per_day_func():
-                return False, "今天已执行过"
-            else:
-                print(f"[{self.window_name}] 今天首次执行")
-
-        return True, "在窗口内"
-
-    def get_status(self, window_config: dict, check_once_per_day_func=None) -> dict:
-        """
-        获取窗口状态信息
-
-        Args:
-            window_config: 窗口配置
-            check_once_per_day_func: 检查今天是否已执行的函数
-
-        Returns:
-            状态信息字典
-        """
-        now = self.get_time_func()
-        status = {
-            "enabled": window_config.get("ENABLED", False),
-            "current_time": now.strftime("%H:%M:%S"),
-            "current_date": now.strftime("%Y-%m-%d"),
-            "timezone": str(now.tzinfo),
-        }
-
-        if status["enabled"]:
-            time_range = window_config.get("TIME_RANGE", {})
-            status["window_start"] = time_range.get("START", "00:00")
-            status["window_end"] = time_range.get("END", "23:59")
-            status["in_window"] = self.is_in_time_range(
-                status["window_start"], status["window_end"]
-            )
-            status["once_per_day"] = window_config.get("ONCE_PER_DAY", False)
-
-            if status["once_per_day"] and check_once_per_day_func:
-                status["executed_today"] = check_once_per_day_func()
-
-        return status

+ 0 - 18
trendradar/utils/url.py

@@ -126,21 +126,3 @@ def normalize_url(url: str, platform_id: str = "") -> str:
     except Exception:
         # 解析失败时返回原始 URL
         return url
-
-
-def get_url_signature(url: str, platform_id: str = "") -> str:
-    """
-    获取 URL 的签名(用于快速比较)
-
-    基于标准化 URL 生成签名,可用于:
-    - 快速判断两个 URL 是否指向同一内容
-    - 作为缓存键
-
-    Args:
-        url: 原始 URL
-        platform_id: 平台 ID
-
-    Returns:
-        URL 签名字符串
-    """
-    return normalize_url(url, platform_id)

+ 1 - 1
uv.lock

@@ -1996,7 +1996,7 @@ wheels = [
 
 [[package]]
 name = "trendradar"
-version = "6.5.3"
+version = "6.5.4"
 source = { editable = "." }
 dependencies = [
     { name = "boto3" },

+ 1 - 1
version

@@ -1 +1 @@
-6.5.3
+6.5.4