|
|
@@ -15,7 +15,7 @@ import requests
|
|
|
import yaml
|
|
|
|
|
|
|
|
|
-VERSION = "2.1.1"
|
|
|
+VERSION = "2.1.2"
|
|
|
|
|
|
|
|
|
# === 配置管理 ===
|
|
|
@@ -43,6 +43,7 @@ def load_config():
|
|
|
"ENABLE_CRAWLER": config_data["crawler"]["enable_crawler"],
|
|
|
"ENABLE_NOTIFICATION": config_data["notification"]["enable_notification"],
|
|
|
"MESSAGE_BATCH_SIZE": config_data["notification"]["message_batch_size"],
|
|
|
+ "DINGTALK_BATCH_SIZE": config_data["notification"].get("dingtalk_batch_size", 20000),
|
|
|
"BATCH_SEND_INTERVAL": config_data["notification"]["batch_send_interval"],
|
|
|
"FEISHU_MESSAGE_SEPARATOR": config_data["notification"][
|
|
|
"feishu_message_separator"
|
|
|
@@ -2273,10 +2274,16 @@ def split_content_into_batches(
|
|
|
report_data: Dict,
|
|
|
format_type: str,
|
|
|
update_info: Optional[Dict] = None,
|
|
|
- max_bytes: int = CONFIG["MESSAGE_BATCH_SIZE"],
|
|
|
+ max_bytes: int = None,
|
|
|
mode: str = "daily",
|
|
|
) -> List[str]:
|
|
|
"""分批处理消息内容,确保词组标题+至少第一条新闻的完整性"""
|
|
|
+ if max_bytes is None:
|
|
|
+ if format_type == "dingtalk":
|
|
|
+ max_bytes = CONFIG.get("DINGTALK_BATCH_SIZE", 20000)
|
|
|
+ else:
|
|
|
+ max_bytes = CONFIG.get("MESSAGE_BATCH_SIZE", 4000)
|
|
|
+
|
|
|
batches = []
|
|
|
|
|
|
total_titles = sum(
|
|
|
@@ -2289,6 +2296,11 @@ def split_content_into_batches(
|
|
|
base_header = f"**总新闻数:** {total_titles}\n\n\n\n"
|
|
|
elif format_type == "telegram":
|
|
|
base_header = f"总新闻数: {total_titles}\n\n"
|
|
|
+ elif format_type == "dingtalk":
|
|
|
+ base_header = f"**总新闻数:** {total_titles}\n\n"
|
|
|
+ base_header += f"**时间:** {now.strftime('%Y-%m-%d %H:%M:%S')}\n\n"
|
|
|
+ base_header += f"**类型:** 热点分析报告\n\n"
|
|
|
+ base_header += "---\n\n"
|
|
|
|
|
|
base_footer = ""
|
|
|
if format_type == "wework":
|
|
|
@@ -2299,6 +2311,10 @@ def split_content_into_batches(
|
|
|
base_footer = f"\n\n更新时间:{now.strftime('%Y-%m-%d %H:%M:%S')}"
|
|
|
if update_info:
|
|
|
base_footer += f"\nTrendRadar 发现新版本 {update_info['remote_version']},当前 {update_info['current_version']}"
|
|
|
+ elif format_type == "dingtalk":
|
|
|
+ base_footer = f"\n\n> 更新时间:{now.strftime('%Y-%m-%d %H:%M:%S')}"
|
|
|
+ if update_info:
|
|
|
+ base_footer += f"\n> TrendRadar 发现新版本 **{update_info['remote_version']}**,当前 **{update_info['current_version']}**"
|
|
|
|
|
|
stats_header = ""
|
|
|
if report_data["stats"]:
|
|
|
@@ -2306,6 +2322,8 @@ def split_content_into_batches(
|
|
|
stats_header = f"📊 **热点词汇统计**\n\n"
|
|
|
elif format_type == "telegram":
|
|
|
stats_header = f"📊 热点词汇统计\n\n"
|
|
|
+ elif format_type == "dingtalk":
|
|
|
+ stats_header = f"📊 **热点词汇统计**\n\n"
|
|
|
|
|
|
current_batch = base_header
|
|
|
current_batch_has_content = False
|
|
|
@@ -2370,6 +2388,17 @@ def split_content_into_batches(
|
|
|
word_header = f"📈 {sequence_display} {word} : {count} 条\n\n"
|
|
|
else:
|
|
|
word_header = f"📌 {sequence_display} {word} : {count} 条\n\n"
|
|
|
+ elif format_type == "dingtalk":
|
|
|
+ if count >= 10:
|
|
|
+ word_header = (
|
|
|
+ f"🔥 {sequence_display} **{word}** : **{count}** 条\n\n"
|
|
|
+ )
|
|
|
+ elif count >= 5:
|
|
|
+ word_header = (
|
|
|
+ f"📈 {sequence_display} **{word}** : **{count}** 条\n\n"
|
|
|
+ )
|
|
|
+ else:
|
|
|
+ word_header = f"📌 {sequence_display} **{word}** : {count} 条\n\n"
|
|
|
|
|
|
# 构建第一条新闻
|
|
|
first_news_line = ""
|
|
|
@@ -2383,6 +2412,10 @@ def split_content_into_batches(
|
|
|
formatted_title = format_title_for_platform(
|
|
|
"telegram", first_title_data, show_source=True
|
|
|
)
|
|
|
+ elif format_type == "dingtalk":
|
|
|
+ formatted_title = format_title_for_platform(
|
|
|
+ "dingtalk", first_title_data, show_source=True
|
|
|
+ )
|
|
|
else:
|
|
|
formatted_title = f"{first_title_data['title']}"
|
|
|
|
|
|
@@ -2420,6 +2453,10 @@ def split_content_into_batches(
|
|
|
formatted_title = format_title_for_platform(
|
|
|
"telegram", title_data, show_source=True
|
|
|
)
|
|
|
+ elif format_type == "dingtalk":
|
|
|
+ formatted_title = format_title_for_platform(
|
|
|
+ "dingtalk", title_data, show_source=True
|
|
|
+ )
|
|
|
else:
|
|
|
formatted_title = f"{title_data['title']}"
|
|
|
|
|
|
@@ -2447,6 +2484,8 @@ def split_content_into_batches(
|
|
|
separator = f"\n\n\n\n"
|
|
|
elif format_type == "telegram":
|
|
|
separator = f"\n\n"
|
|
|
+ elif format_type == "dingtalk":
|
|
|
+ separator = f"\n---\n\n"
|
|
|
|
|
|
test_content = current_batch + separator
|
|
|
if (
|
|
|
@@ -2464,6 +2503,8 @@ def split_content_into_batches(
|
|
|
new_header = (
|
|
|
f"\n\n🆕 本次新增热点新闻 (共 {report_data['total_new_count']} 条)\n\n"
|
|
|
)
|
|
|
+ elif format_type == "dingtalk":
|
|
|
+ new_header = f"\n---\n\n🆕 **本次新增热点新闻** (共 {report_data['total_new_count']} 条)\n\n"
|
|
|
|
|
|
test_content = current_batch + new_header
|
|
|
if (
|
|
|
@@ -2485,6 +2526,8 @@ def split_content_into_batches(
|
|
|
source_header = f"**{source_data['source_name']}** ({len(source_data['titles'])} 条):\n\n"
|
|
|
elif format_type == "telegram":
|
|
|
source_header = f"{source_data['source_name']} ({len(source_data['titles'])} 条):\n\n"
|
|
|
+ elif format_type == "dingtalk":
|
|
|
+ source_header = f"**{source_data['source_name']}** ({len(source_data['titles'])} 条):\n\n"
|
|
|
|
|
|
# 构建第一条新增新闻
|
|
|
first_news_line = ""
|
|
|
@@ -2501,6 +2544,10 @@ def split_content_into_batches(
|
|
|
formatted_title = format_title_for_platform(
|
|
|
"telegram", title_data_copy, show_source=False
|
|
|
)
|
|
|
+ elif format_type == "dingtalk":
|
|
|
+ formatted_title = format_title_for_platform(
|
|
|
+ "dingtalk", title_data_copy, show_source=False
|
|
|
+ )
|
|
|
else:
|
|
|
formatted_title = f"{title_data_copy['title']}"
|
|
|
|
|
|
@@ -2538,6 +2585,10 @@ def split_content_into_batches(
|
|
|
formatted_title = format_title_for_platform(
|
|
|
"telegram", title_data_copy, show_source=False
|
|
|
)
|
|
|
+ elif format_type == "dingtalk":
|
|
|
+ formatted_title = format_title_for_platform(
|
|
|
+ "dingtalk", title_data_copy, show_source=False
|
|
|
+ )
|
|
|
else:
|
|
|
formatted_title = f"{title_data_copy['title']}"
|
|
|
|
|
|
@@ -2564,6 +2615,8 @@ def split_content_into_batches(
|
|
|
failed_header = f"\n\n\n\n⚠️ **数据获取失败的平台:**\n\n"
|
|
|
elif format_type == "telegram":
|
|
|
failed_header = f"\n\n⚠️ 数据获取失败的平台:\n\n"
|
|
|
+ elif format_type == "dingtalk":
|
|
|
+ failed_header = f"\n---\n\n⚠️ **数据获取失败的平台:**\n\n"
|
|
|
|
|
|
test_content = current_batch + failed_header
|
|
|
if (
|
|
|
@@ -2579,7 +2632,11 @@ def split_content_into_batches(
|
|
|
current_batch_has_content = True
|
|
|
|
|
|
for i, id_value in enumerate(report_data["failed_ids"], 1):
|
|
|
- failed_line = f" • {id_value}\n"
|
|
|
+ if format_type == "dingtalk":
|
|
|
+ failed_line = f" • **{id_value}**\n"
|
|
|
+ else:
|
|
|
+ failed_line = f" • {id_value}\n"
|
|
|
+
|
|
|
test_content = current_batch + failed_line
|
|
|
if (
|
|
|
len(test_content.encode("utf-8")) + len(base_footer.encode("utf-8"))
|
|
|
@@ -2735,41 +2792,78 @@ def send_to_dingtalk(
|
|
|
proxy_url: Optional[str] = None,
|
|
|
mode: str = "daily",
|
|
|
) -> bool:
|
|
|
- """发送到钉钉"""
|
|
|
+ """发送到钉钉(支持分批发送)"""
|
|
|
headers = {"Content-Type": "application/json"}
|
|
|
-
|
|
|
- text_content = render_dingtalk_content(report_data, update_info, mode)
|
|
|
-
|
|
|
- payload = {
|
|
|
- "msgtype": "markdown",
|
|
|
- "markdown": {
|
|
|
- "title": f"TrendRadar 热点分析报告 - {report_type}",
|
|
|
- "text": text_content,
|
|
|
- },
|
|
|
- }
|
|
|
-
|
|
|
proxies = None
|
|
|
if proxy_url:
|
|
|
proxies = {"http": proxy_url, "https": proxy_url}
|
|
|
|
|
|
- try:
|
|
|
- response = requests.post(
|
|
|
- webhook_url, headers=headers, json=payload, proxies=proxies, timeout=30
|
|
|
+ # 获取分批内容,使用钉钉专用的批次大小
|
|
|
+ batches = split_content_into_batches(
|
|
|
+ report_data,
|
|
|
+ "dingtalk",
|
|
|
+ update_info,
|
|
|
+ max_bytes=CONFIG.get("DINGTALK_BATCH_SIZE", 20000),
|
|
|
+ mode=mode
|
|
|
+ )
|
|
|
+
|
|
|
+ print(f"钉钉消息分为 {len(batches)} 批次发送 [{report_type}]")
|
|
|
+
|
|
|
+ # 逐批发送
|
|
|
+ for i, batch_content in enumerate(batches, 1):
|
|
|
+ batch_size = len(batch_content.encode("utf-8"))
|
|
|
+ print(
|
|
|
+ f"发送钉钉第 {i}/{len(batches)} 批次,大小:{batch_size} 字节 [{report_type}]"
|
|
|
)
|
|
|
- if response.status_code == 200:
|
|
|
- result = response.json()
|
|
|
- if result.get("errcode") == 0:
|
|
|
- print(f"钉钉通知发送成功 [{report_type}]")
|
|
|
- return True
|
|
|
+
|
|
|
+ # 添加批次标识
|
|
|
+ if len(batches) > 1:
|
|
|
+ batch_header = f"**[第 {i}/{len(batches)} 批次]**\n\n"
|
|
|
+ # 将批次标识插入到适当位置(在标题之后)
|
|
|
+ if "📊 **热点词汇统计**" in batch_content:
|
|
|
+ batch_content = batch_content.replace(
|
|
|
+ "📊 **热点词汇统计**\n\n",
|
|
|
+ f"📊 **热点词汇统计** {batch_header}\n\n"
|
|
|
+ )
|
|
|
+ else:
|
|
|
+ # 如果没有统计标题,直接在开头添加
|
|
|
+ batch_content = batch_header + batch_content
|
|
|
+
|
|
|
+ payload = {
|
|
|
+ "msgtype": "markdown",
|
|
|
+ "markdown": {
|
|
|
+ "title": f"TrendRadar 热点分析报告 - {report_type}",
|
|
|
+ "text": batch_content,
|
|
|
+ },
|
|
|
+ }
|
|
|
+
|
|
|
+ try:
|
|
|
+ response = requests.post(
|
|
|
+ webhook_url, headers=headers, json=payload, proxies=proxies, timeout=30
|
|
|
+ )
|
|
|
+ if response.status_code == 200:
|
|
|
+ result = response.json()
|
|
|
+ if result.get("errcode") == 0:
|
|
|
+ print(f"钉钉第 {i}/{len(batches)} 批次发送成功 [{report_type}]")
|
|
|
+ # 批次间间隔
|
|
|
+ if i < len(batches):
|
|
|
+ time.sleep(CONFIG["BATCH_SEND_INTERVAL"])
|
|
|
+ else:
|
|
|
+ print(
|
|
|
+ f"钉钉第 {i}/{len(batches)} 批次发送失败 [{report_type}],错误:{result.get('errmsg')}"
|
|
|
+ )
|
|
|
+ return False
|
|
|
else:
|
|
|
- print(f"钉钉通知发送失败 [{report_type}],错误:{result.get('errmsg')}")
|
|
|
+ print(
|
|
|
+ f"钉钉第 {i}/{len(batches)} 批次发送失败 [{report_type}],状态码:{response.status_code}"
|
|
|
+ )
|
|
|
return False
|
|
|
- else:
|
|
|
- print(f"钉钉通知发送失败 [{report_type}],状态码:{response.status_code}")
|
|
|
+ except Exception as e:
|
|
|
+ print(f"钉钉第 {i}/{len(batches)} 批次发送出错 [{report_type}]:{e}")
|
|
|
return False
|
|
|
- except Exception as e:
|
|
|
- print(f"钉钉通知发送出错 [{report_type}]:{e}")
|
|
|
- return False
|
|
|
+
|
|
|
+ print(f"钉钉所有 {len(batches)} 批次发送完成 [{report_type}]")
|
|
|
+ return True
|
|
|
|
|
|
|
|
|
def send_to_wework(
|