|
|
@@ -157,11 +157,10 @@ class AnalyticsTools:
|
|
|
self,
|
|
|
topic: str,
|
|
|
analysis_type: str = "trend",
|
|
|
- time_range: str = "7d",
|
|
|
+ date_range: Optional[Dict[str, str]] = None,
|
|
|
granularity: str = "day",
|
|
|
threshold: float = 3.0,
|
|
|
time_window: int = 24,
|
|
|
- lookback_days: int = 7,
|
|
|
lookahead_hours: int = 6,
|
|
|
confidence_threshold: float = 0.7
|
|
|
) -> Dict:
|
|
|
@@ -175,11 +174,12 @@ class AnalyticsTools:
|
|
|
- "lifecycle": 生命周期分析(从出现到消失的完整周期)
|
|
|
- "viral": 异常热度检测(识别突然爆火的话题)
|
|
|
- "predict": 话题预测(预测未来可能的热点)
|
|
|
- time_range: 时间范围(trend模式),默认"7d"(7d/24h/1w/1m/2m)
|
|
|
+ date_range: 日期范围(trend和lifecycle模式),可选
|
|
|
+ - **格式**: {"start": "YYYY-MM-DD", "end": "YYYY-MM-DD"}
|
|
|
+ - **默认**: 不指定时默认分析最近7天
|
|
|
granularity: 时间粒度(trend模式),默认"day"(hour/day)
|
|
|
threshold: 热度突增倍数阈值(viral模式),默认3.0
|
|
|
time_window: 检测时间窗口小时数(viral模式),默认24
|
|
|
- lookback_days: 回溯天数(lifecycle模式),默认7
|
|
|
lookahead_hours: 预测未来小时数(predict模式),默认6
|
|
|
confidence_threshold: 置信度阈值(predict模式),默认0.7
|
|
|
|
|
|
@@ -187,8 +187,8 @@ class AnalyticsTools:
|
|
|
趋势分析结果字典
|
|
|
|
|
|
Examples:
|
|
|
- - analyze_topic_trend_unified(topic="人工智能", analysis_type="trend", time_range="7d")
|
|
|
- - analyze_topic_trend_unified(topic="特斯拉", analysis_type="lifecycle", lookback_days=7)
|
|
|
+ - analyze_topic_trend_unified(topic="人工智能", analysis_type="trend", date_range={"start": "2025-10-18", "end": "2025-10-25"})
|
|
|
+ - analyze_topic_trend_unified(topic="特斯拉", analysis_type="lifecycle", date_range={"start": "2025-10-18", "end": "2025-10-25"})
|
|
|
- analyze_topic_trend_unified(topic="比特币", analysis_type="viral", threshold=3.0)
|
|
|
- analyze_topic_trend_unified(topic="ChatGPT", analysis_type="predict", lookahead_hours=6)
|
|
|
"""
|
|
|
@@ -206,13 +206,13 @@ class AnalyticsTools:
|
|
|
if analysis_type == "trend":
|
|
|
return self.get_topic_trend_analysis(
|
|
|
topic=topic,
|
|
|
- time_range=time_range,
|
|
|
+ date_range=date_range,
|
|
|
granularity=granularity
|
|
|
)
|
|
|
elif analysis_type == "lifecycle":
|
|
|
return self.analyze_topic_lifecycle(
|
|
|
topic=topic,
|
|
|
- lookback_days=lookback_days
|
|
|
+ date_range=date_range
|
|
|
)
|
|
|
elif analysis_type == "viral":
|
|
|
# viral模式不需要topic参数,使用通用检测
|
|
|
@@ -244,7 +244,7 @@ class AnalyticsTools:
|
|
|
def get_topic_trend_analysis(
|
|
|
self,
|
|
|
topic: str,
|
|
|
- time_range: str = "7d",
|
|
|
+ date_range: Optional[Dict[str, str]] = None,
|
|
|
granularity: str = "day"
|
|
|
) -> Dict:
|
|
|
"""
|
|
|
@@ -252,7 +252,9 @@ class AnalyticsTools:
|
|
|
|
|
|
Args:
|
|
|
topic: 话题关键词
|
|
|
- time_range: 时间范围,格式:7d(7天)、24h(24小时)、1w(1周)、1m(1个月)、2m(2个月)
|
|
|
+ date_range: 日期范围(可选)
|
|
|
+ - **格式**: {"start": "YYYY-MM-DD", "end": "YYYY-MM-DD"}
|
|
|
+ - **默认**: 不指定时默认分析最近7天
|
|
|
granularity: 时间粒度,仅支持 day(天)
|
|
|
|
|
|
Returns:
|
|
|
@@ -264,20 +266,20 @@ class AnalyticsTools:
|
|
|
- "查看'比特币'过去一周的热度变化"
|
|
|
- "看看'iPhone'最近7天的趋势如何"
|
|
|
- "分析'特斯拉'最近一个月的热度趋势"
|
|
|
- - "查看'ChatGPT'过去2个月的趋势变化"
|
|
|
+ - "查看'ChatGPT'2024年12月的趋势变化"
|
|
|
|
|
|
代码调用示例:
|
|
|
>>> tools = AnalyticsTools()
|
|
|
>>> # 分析7天趋势
|
|
|
>>> result = tools.get_topic_trend_analysis(
|
|
|
... topic="人工智能",
|
|
|
- ... time_range="7d",
|
|
|
+ ... date_range={"start": "2025-10-18", "end": "2025-10-25"},
|
|
|
... granularity="day"
|
|
|
... )
|
|
|
- >>> # 分析1个月趋势
|
|
|
+ >>> # 分析历史月份趋势
|
|
|
>>> result = tools.get_topic_trend_analysis(
|
|
|
... topic="特斯拉",
|
|
|
- ... time_range="1m",
|
|
|
+ ... date_range={"start": "2024-12-01", "end": "2024-12-31"},
|
|
|
... granularity="day"
|
|
|
... )
|
|
|
>>> print(result['trend_data'])
|
|
|
@@ -294,15 +296,21 @@ class AnalyticsTools:
|
|
|
suggestion="当前仅支持 'day' 粒度,因为底层数据按天聚合"
|
|
|
)
|
|
|
|
|
|
- # 解析时间范围
|
|
|
- days = self._parse_time_range(time_range)
|
|
|
+ # 处理日期范围(不指定时默认最近7天)
|
|
|
+ if date_range:
|
|
|
+ from ..utils.validators import validate_date_range
|
|
|
+ date_range_tuple = validate_date_range(date_range)
|
|
|
+ start_date, end_date = date_range_tuple
|
|
|
+ else:
|
|
|
+ # 默认最近7天
|
|
|
+ end_date = datetime.now()
|
|
|
+ start_date = end_date - timedelta(days=6)
|
|
|
|
|
|
# 收集趋势数据
|
|
|
trend_data = []
|
|
|
- start_date = datetime.now() - timedelta(days=days)
|
|
|
current_date = start_date
|
|
|
|
|
|
- while current_date <= datetime.now():
|
|
|
+ while current_date <= end_date:
|
|
|
try:
|
|
|
all_titles, _, _ = self.data_service.parser.read_all_titles_for_date(
|
|
|
date=current_date
|
|
|
@@ -336,6 +344,7 @@ class AnalyticsTools:
|
|
|
|
|
|
# 计算趋势指标
|
|
|
counts = [item["count"] for item in trend_data]
|
|
|
+ total_days = (end_date - start_date).days + 1
|
|
|
|
|
|
if len(counts) >= 2:
|
|
|
# 计算涨跌幅度
|
|
|
@@ -359,7 +368,11 @@ class AnalyticsTools:
|
|
|
return {
|
|
|
"success": True,
|
|
|
"topic": topic,
|
|
|
- "time_range": time_range,
|
|
|
+ "date_range": {
|
|
|
+ "start": start_date.strftime("%Y-%m-%d"),
|
|
|
+ "end": end_date.strftime("%Y-%m-%d"),
|
|
|
+ "total_days": total_days
|
|
|
+ },
|
|
|
"granularity": granularity,
|
|
|
"trend_data": trend_data,
|
|
|
"statistics": {
|
|
|
@@ -1452,14 +1465,16 @@ class AnalyticsTools:
|
|
|
def analyze_topic_lifecycle(
|
|
|
self,
|
|
|
topic: str,
|
|
|
- lookback_days: int = 7
|
|
|
+ date_range: Optional[Dict[str, str]] = None
|
|
|
) -> Dict:
|
|
|
"""
|
|
|
话题生命周期分析 - 追踪话题从出现到消失的完整周期
|
|
|
|
|
|
Args:
|
|
|
topic: 话题关键词
|
|
|
- lookback_days: 回溯天数
|
|
|
+ date_range: 日期范围(可选)
|
|
|
+ - **格式**: {"start": "YYYY-MM-DD", "end": "YYYY-MM-DD"}
|
|
|
+ - **默认**: 不指定时默认分析最近7天
|
|
|
|
|
|
Returns:
|
|
|
话题生命周期分析结果
|
|
|
@@ -1474,21 +1489,28 @@ class AnalyticsTools:
|
|
|
>>> tools = AnalyticsTools()
|
|
|
>>> result = tools.analyze_topic_lifecycle(
|
|
|
... topic="人工智能",
|
|
|
- ... lookback_days=7
|
|
|
+ ... date_range={"start": "2025-10-18", "end": "2025-10-25"}
|
|
|
... )
|
|
|
>>> print(result['lifecycle_stage'])
|
|
|
"""
|
|
|
try:
|
|
|
# 参数验证
|
|
|
topic = validate_keyword(topic)
|
|
|
- lookback_days = validate_limit(lookback_days, default=7, max_limit=30)
|
|
|
+
|
|
|
+ # 处理日期范围(不指定时默认最近7天)
|
|
|
+ if date_range:
|
|
|
+ from ..utils.validators import validate_date_range
|
|
|
+ date_range_tuple = validate_date_range(date_range)
|
|
|
+ start_date, end_date = date_range_tuple
|
|
|
+ else:
|
|
|
+ # 默认最近7天
|
|
|
+ end_date = datetime.now()
|
|
|
+ start_date = end_date - timedelta(days=6)
|
|
|
|
|
|
# 收集话题历史数据
|
|
|
lifecycle_data = []
|
|
|
- start_date = datetime.now() - timedelta(days=lookback_days)
|
|
|
-
|
|
|
current_date = start_date
|
|
|
- while current_date <= datetime.now():
|
|
|
+ while current_date <= end_date:
|
|
|
try:
|
|
|
all_titles, _, _ = self.data_service.parser.read_all_titles_for_date(
|
|
|
date=current_date
|
|
|
@@ -1514,12 +1536,16 @@ class AnalyticsTools:
|
|
|
|
|
|
current_date += timedelta(days=1)
|
|
|
|
|
|
+ # 计算分析天数
|
|
|
+ total_days = (end_date - start_date).days + 1
|
|
|
+
|
|
|
# 分析生命周期阶段
|
|
|
counts = [item["count"] for item in lifecycle_data]
|
|
|
|
|
|
if not any(counts):
|
|
|
+ time_desc = f"{start_date.strftime('%Y-%m-%d')} 至 {end_date.strftime('%Y-%m-%d')}"
|
|
|
raise DataNotFoundError(
|
|
|
- f"在过去 {lookback_days} 天内未找到话题 '{topic}'",
|
|
|
+ f"在 {time_desc} 内未找到话题 '{topic}'",
|
|
|
suggestion="请尝试其他话题或扩大时间范围"
|
|
|
)
|
|
|
|
|
|
@@ -1554,7 +1580,7 @@ class AnalyticsTools:
|
|
|
|
|
|
if active_days <= 2 and max_count > avg_count * 2:
|
|
|
topic_type = "昙花一现"
|
|
|
- elif active_days >= lookback_days * 0.6:
|
|
|
+ elif active_days >= total_days * 0.6:
|
|
|
topic_type = "持续热点"
|
|
|
else:
|
|
|
topic_type = "周期性热点"
|
|
|
@@ -1562,7 +1588,11 @@ class AnalyticsTools:
|
|
|
return {
|
|
|
"success": True,
|
|
|
"topic": topic,
|
|
|
- "lookback_days": lookback_days,
|
|
|
+ "date_range": {
|
|
|
+ "start": start_date.strftime("%Y-%m-%d"),
|
|
|
+ "end": end_date.strftime("%Y-%m-%d"),
|
|
|
+ "total_days": total_days
|
|
|
+ },
|
|
|
"lifecycle_data": lifecycle_data,
|
|
|
"analysis": {
|
|
|
"first_appearance": first_appearance,
|
|
|
@@ -1890,29 +1920,6 @@ class AnalyticsTools:
|
|
|
|
|
|
# ==================== 辅助方法 ====================
|
|
|
|
|
|
- def _parse_time_range(self, time_range: str) -> int:
|
|
|
- """解析时间范围字符串为天数"""
|
|
|
- match = re.match(r'(\d+)([dhwm])', time_range.lower())
|
|
|
- if not match:
|
|
|
- raise InvalidParameterError(
|
|
|
- f"无效的时间范围格式: {time_range}",
|
|
|
- suggestion="格式示例:7d(7天)、24h(24小时)、1w(1周)、1m(1个月)、2m(2个月)"
|
|
|
- )
|
|
|
-
|
|
|
- value = int(match.group(1))
|
|
|
- unit = match.group(2)
|
|
|
-
|
|
|
- if unit == 'h':
|
|
|
- return max(1, value // 24) # 转换为天数
|
|
|
- elif unit == 'd':
|
|
|
- return value
|
|
|
- elif unit == 'w':
|
|
|
- return value * 7
|
|
|
- elif unit == 'm':
|
|
|
- return value * 30 # 1个月按30天计算
|
|
|
-
|
|
|
- return value
|
|
|
-
|
|
|
def _extract_keywords(self, title: str, min_length: int = 2) -> List[str]:
|
|
|
"""
|
|
|
从标题中提取关键词(简单实现)
|