date_parser.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507
  1. """
  2. 日期解析工具
  3. 支持多种自然语言日期格式解析,包括相对日期和绝对日期。
  4. """
  5. import re
  6. from datetime import datetime, timedelta
  7. from typing import Tuple, Dict, Optional
  8. from .errors import InvalidParameterError
  9. class DateParser:
  10. """日期解析器类"""
  11. # 中文日期映射
  12. CN_DATE_MAPPING = {
  13. "今天": 0,
  14. "昨天": 1,
  15. "前天": 2,
  16. "大前天": 3,
  17. }
  18. # 英文日期映射
  19. EN_DATE_MAPPING = {
  20. "today": 0,
  21. "yesterday": 1,
  22. }
  23. # 日期范围表达式(用于 resolve_date_range_expression)
  24. RANGE_EXPRESSIONS = {
  25. # 中文表达式
  26. "今天": "today",
  27. "昨天": "yesterday",
  28. "本周": "this_week",
  29. "这周": "this_week",
  30. "当前周": "this_week",
  31. "上周": "last_week",
  32. "本月": "this_month",
  33. "这个月": "this_month",
  34. "当前月": "this_month",
  35. "上月": "last_month",
  36. "上个月": "last_month",
  37. "最近3天": "last_3_days",
  38. "近3天": "last_3_days",
  39. "最近7天": "last_7_days",
  40. "近7天": "last_7_days",
  41. "最近一周": "last_7_days",
  42. "过去一周": "last_7_days",
  43. "最近14天": "last_14_days",
  44. "近14天": "last_14_days",
  45. "最近两周": "last_14_days",
  46. "过去两周": "last_14_days",
  47. "最近30天": "last_30_days",
  48. "近30天": "last_30_days",
  49. "最近一个月": "last_30_days",
  50. "过去一个月": "last_30_days",
  51. # 英文表达式
  52. "today": "today",
  53. "yesterday": "yesterday",
  54. "this week": "this_week",
  55. "current week": "this_week",
  56. "last week": "last_week",
  57. "this month": "this_month",
  58. "current month": "this_month",
  59. "last month": "last_month",
  60. "last 3 days": "last_3_days",
  61. "past 3 days": "last_3_days",
  62. "last 7 days": "last_7_days",
  63. "past 7 days": "last_7_days",
  64. "past week": "last_7_days",
  65. "last 14 days": "last_14_days",
  66. "past 14 days": "last_14_days",
  67. "last 30 days": "last_30_days",
  68. "past 30 days": "last_30_days",
  69. "past month": "last_30_days",
  70. }
  71. # 星期映射
  72. WEEKDAY_CN = {
  73. "一": 0, "二": 1, "三": 2, "四": 3,
  74. "五": 4, "六": 5, "日": 6, "天": 6
  75. }
  76. WEEKDAY_EN = {
  77. "monday": 0, "tuesday": 1, "wednesday": 2, "thursday": 3,
  78. "friday": 4, "saturday": 5, "sunday": 6
  79. }
  80. @staticmethod
  81. def parse_date_query(date_query: str) -> datetime:
  82. """
  83. 解析日期查询字符串
  84. 支持的格式:
  85. - 相对日期(中文):今天、昨天、前天、大前天、N天前
  86. - 相对日期(英文):today、yesterday、N days ago
  87. - 星期(中文):上周一、上周二、本周三
  88. - 星期(英文):last monday、this friday
  89. - 绝对日期:2025-10-10、10月10日、2025年10月10日
  90. Args:
  91. date_query: 日期查询字符串
  92. Returns:
  93. datetime对象
  94. Raises:
  95. InvalidParameterError: 日期格式无法识别
  96. Examples:
  97. >>> DateParser.parse_date_query("今天")
  98. datetime(2025, 10, 11)
  99. >>> DateParser.parse_date_query("昨天")
  100. datetime(2025, 10, 10)
  101. >>> DateParser.parse_date_query("3天前")
  102. datetime(2025, 10, 8)
  103. >>> DateParser.parse_date_query("2025-10-10")
  104. datetime(2025, 10, 10)
  105. """
  106. if not date_query or not isinstance(date_query, str):
  107. raise InvalidParameterError(
  108. "日期查询字符串不能为空",
  109. suggestion="请提供有效的日期查询,如:今天、昨天、2025-10-10"
  110. )
  111. date_query = date_query.strip().lower()
  112. # 1. 尝试解析中文常用相对日期
  113. if date_query in DateParser.CN_DATE_MAPPING:
  114. days_ago = DateParser.CN_DATE_MAPPING[date_query]
  115. return datetime.now() - timedelta(days=days_ago)
  116. # 2. 尝试解析英文常用相对日期
  117. if date_query in DateParser.EN_DATE_MAPPING:
  118. days_ago = DateParser.EN_DATE_MAPPING[date_query]
  119. return datetime.now() - timedelta(days=days_ago)
  120. # 3. 尝试解析 "N天前" 或 "N days ago"
  121. cn_days_ago_match = re.match(r'(\d+)\s*天前', date_query)
  122. if cn_days_ago_match:
  123. days = int(cn_days_ago_match.group(1))
  124. if days > 365:
  125. raise InvalidParameterError(
  126. f"天数过大: {days}天",
  127. suggestion="请使用小于365天的相对日期或使用绝对日期"
  128. )
  129. return datetime.now() - timedelta(days=days)
  130. en_days_ago_match = re.match(r'(\d+)\s*days?\s+ago', date_query)
  131. if en_days_ago_match:
  132. days = int(en_days_ago_match.group(1))
  133. if days > 365:
  134. raise InvalidParameterError(
  135. f"天数过大: {days}天",
  136. suggestion="请使用小于365天的相对日期或使用绝对日期"
  137. )
  138. return datetime.now() - timedelta(days=days)
  139. # 4. 尝试解析星期(中文):上周一、本周三
  140. cn_weekday_match = re.match(r'(上|本)周([一二三四五六日天])', date_query)
  141. if cn_weekday_match:
  142. week_type = cn_weekday_match.group(1) # 上 或 本
  143. weekday_str = cn_weekday_match.group(2)
  144. target_weekday = DateParser.WEEKDAY_CN[weekday_str]
  145. return DateParser._get_date_by_weekday(target_weekday, week_type == "上")
  146. # 5. 尝试解析星期(英文):last monday、this friday
  147. en_weekday_match = re.match(r'(last|this)\s+(monday|tuesday|wednesday|thursday|friday|saturday|sunday)', date_query)
  148. if en_weekday_match:
  149. week_type = en_weekday_match.group(1) # last 或 this
  150. weekday_str = en_weekday_match.group(2)
  151. target_weekday = DateParser.WEEKDAY_EN[weekday_str]
  152. return DateParser._get_date_by_weekday(target_weekday, week_type == "last")
  153. # 6. 尝试解析绝对日期:YYYY-MM-DD
  154. iso_date_match = re.match(r'(\d{4})-(\d{1,2})-(\d{1,2})', date_query)
  155. if iso_date_match:
  156. year = int(iso_date_match.group(1))
  157. month = int(iso_date_match.group(2))
  158. day = int(iso_date_match.group(3))
  159. try:
  160. return datetime(year, month, day)
  161. except ValueError as e:
  162. raise InvalidParameterError(
  163. f"无效的日期: {date_query}",
  164. suggestion=f"日期值错误: {str(e)}"
  165. )
  166. # 7. 尝试解析中文日期:MM月DD日 或 YYYY年MM月DD日
  167. cn_date_match = re.match(r'(?:(\d{4})年)?(\d{1,2})月(\d{1,2})日', date_query)
  168. if cn_date_match:
  169. year_str = cn_date_match.group(1)
  170. month = int(cn_date_match.group(2))
  171. day = int(cn_date_match.group(3))
  172. # 如果没有年份,使用当前年份
  173. if year_str:
  174. year = int(year_str)
  175. else:
  176. year = datetime.now().year
  177. # 如果月份大于当前月份,说明是去年
  178. current_month = datetime.now().month
  179. if month > current_month:
  180. year -= 1
  181. try:
  182. return datetime(year, month, day)
  183. except ValueError as e:
  184. raise InvalidParameterError(
  185. f"无效的日期: {date_query}",
  186. suggestion=f"日期值错误: {str(e)}"
  187. )
  188. # 8. 尝试解析斜杠格式:YYYY/MM/DD 或 MM/DD
  189. slash_date_match = re.match(r'(?:(\d{4})/)?(\d{1,2})/(\d{1,2})', date_query)
  190. if slash_date_match:
  191. year_str = slash_date_match.group(1)
  192. month = int(slash_date_match.group(2))
  193. day = int(slash_date_match.group(3))
  194. if year_str:
  195. year = int(year_str)
  196. else:
  197. year = datetime.now().year
  198. current_month = datetime.now().month
  199. if month > current_month:
  200. year -= 1
  201. try:
  202. return datetime(year, month, day)
  203. except ValueError as e:
  204. raise InvalidParameterError(
  205. f"无效的日期: {date_query}",
  206. suggestion=f"日期值错误: {str(e)}"
  207. )
  208. # 如果所有格式都不匹配
  209. raise InvalidParameterError(
  210. f"无法识别的日期格式: {date_query}",
  211. suggestion=(
  212. "支持的格式:\n"
  213. "- 相对日期: 今天、昨天、前天、3天前、today、yesterday、3 days ago\n"
  214. "- 星期: 上周一、本周三、last monday、this friday\n"
  215. "- 绝对日期: 2025-10-10、10月10日、2025年10月10日"
  216. )
  217. )
  218. @staticmethod
  219. def _get_date_by_weekday(target_weekday: int, is_last_week: bool) -> datetime:
  220. """
  221. 根据星期几获取日期
  222. Args:
  223. target_weekday: 目标星期 (0=周一, 6=周日)
  224. is_last_week: 是否是上周
  225. Returns:
  226. datetime对象
  227. """
  228. today = datetime.now()
  229. current_weekday = today.weekday()
  230. # 计算天数差
  231. if is_last_week:
  232. # 上周的某一天
  233. days_diff = current_weekday - target_weekday + 7
  234. else:
  235. # 本周的某一天
  236. days_diff = current_weekday - target_weekday
  237. if days_diff < 0:
  238. days_diff += 7
  239. return today - timedelta(days=days_diff)
  240. @staticmethod
  241. def format_date_folder(date: datetime) -> str:
  242. """
  243. 将日期格式化为文件夹名称
  244. Args:
  245. date: datetime对象
  246. Returns:
  247. 文件夹名称,格式: YYYY-MM-DD
  248. Examples:
  249. >>> DateParser.format_date_folder(datetime(2025, 10, 11))
  250. '2025-10-11'
  251. """
  252. return date.strftime("%Y-%m-%d")
  253. @staticmethod
  254. def validate_date_not_future(date: datetime) -> None:
  255. """
  256. 验证日期不在未来
  257. Args:
  258. date: 待验证的日期
  259. Raises:
  260. InvalidParameterError: 日期在未来
  261. """
  262. if date.date() > datetime.now().date():
  263. raise InvalidParameterError(
  264. f"不能查询未来的日期: {date.strftime('%Y-%m-%d')}",
  265. suggestion="请使用今天或过去的日期"
  266. )
  267. @staticmethod
  268. def validate_date_not_too_old(date: datetime, max_days: int = 365) -> None:
  269. """
  270. 验证日期不太久远
  271. Args:
  272. date: 待验证的日期
  273. max_days: 最大天数
  274. Raises:
  275. InvalidParameterError: 日期太久远
  276. """
  277. days_ago = (datetime.now().date() - date.date()).days
  278. if days_ago > max_days:
  279. raise InvalidParameterError(
  280. f"日期太久远: {date.strftime('%Y-%m-%d')} ({days_ago}天前)",
  281. suggestion=f"请查询{max_days}天内的数据"
  282. )
  283. @staticmethod
  284. def resolve_date_range_expression(expression: str) -> Dict:
  285. """
  286. 将自然语言日期表达式解析为标准日期范围
  287. 这是专门为 MCP 工具设计的方法,用于在服务器端解析日期表达式,
  288. 避免 AI 模型自己计算日期导致的不一致问题。
  289. Args:
  290. expression: 自然语言日期表达式,支持:
  291. - 单日: "今天", "昨天", "today", "yesterday"
  292. - 本周/上周: "本周", "上周", "this week", "last week"
  293. - 本月/上月: "本月", "上月", "this month", "last month"
  294. - 最近N天: "最近7天", "最近30天", "last 7 days", "last 30 days"
  295. - 动态N天: "最近5天", "last 10 days"
  296. Returns:
  297. 解析结果字典:
  298. {
  299. "success": True,
  300. "expression": "本周",
  301. "normalized": "this_week",
  302. "date_range": {
  303. "start": "2025-11-18",
  304. "end": "2025-11-24"
  305. },
  306. "current_date": "2025-11-26",
  307. "description": "本周(周一到周日)"
  308. }
  309. Raises:
  310. InvalidParameterError: 无法识别的日期表达式
  311. Examples:
  312. >>> DateParser.resolve_date_range_expression("本周")
  313. {"success": True, "date_range": {"start": "2025-11-18", "end": "2025-11-24"}, ...}
  314. >>> DateParser.resolve_date_range_expression("最近7天")
  315. {"success": True, "date_range": {"start": "2025-11-20", "end": "2025-11-26"}, ...}
  316. """
  317. if not expression or not isinstance(expression, str):
  318. raise InvalidParameterError(
  319. "日期表达式不能为空",
  320. suggestion="请提供有效的日期表达式,如:本周、最近7天、last week"
  321. )
  322. expression_lower = expression.strip().lower()
  323. today = datetime.now()
  324. today_str = today.strftime("%Y-%m-%d")
  325. # 1. 尝试匹配预定义表达式
  326. normalized = DateParser.RANGE_EXPRESSIONS.get(expression_lower)
  327. # 2. 尝试匹配动态 "最近N天" / "last N days" 模式
  328. if not normalized:
  329. # 中文: 最近N天
  330. cn_match = re.match(r'最近(\d+)天', expression_lower)
  331. if cn_match:
  332. days = int(cn_match.group(1))
  333. normalized = f"last_{days}_days"
  334. # 英文: last N days
  335. en_match = re.match(r'(?:last|past)\s+(\d+)\s+days?', expression_lower)
  336. if en_match:
  337. days = int(en_match.group(1))
  338. normalized = f"last_{days}_days"
  339. if not normalized:
  340. # 提供支持的表达式列表
  341. supported_cn = ["今天", "昨天", "本周", "上周", "本月", "上月",
  342. "最近7天", "最近30天", "最近N天"]
  343. supported_en = ["today", "yesterday", "this week", "last week",
  344. "this month", "last month", "last 7 days", "last N days"]
  345. raise InvalidParameterError(
  346. f"无法识别的日期表达式: {expression}",
  347. suggestion=f"支持的表达式:\n中文: {', '.join(supported_cn)}\n英文: {', '.join(supported_en)}"
  348. )
  349. # 3. 根据 normalized 类型计算日期范围
  350. start_date, end_date, description = DateParser._calculate_date_range(
  351. normalized, today
  352. )
  353. return {
  354. "success": True,
  355. "expression": expression,
  356. "normalized": normalized,
  357. "date_range": {
  358. "start": start_date.strftime("%Y-%m-%d"),
  359. "end": end_date.strftime("%Y-%m-%d")
  360. },
  361. "current_date": today_str,
  362. "description": description
  363. }
  364. @staticmethod
  365. def _calculate_date_range(
  366. normalized: str,
  367. today: datetime
  368. ) -> Tuple[datetime, datetime, str]:
  369. """
  370. 根据标准化的日期类型计算实际日期范围
  371. Args:
  372. normalized: 标准化的日期类型
  373. today: 当前日期
  374. Returns:
  375. (start_date, end_date, description) 元组
  376. """
  377. # 单日类型
  378. if normalized == "today":
  379. return today, today, "今天"
  380. if normalized == "yesterday":
  381. yesterday = today - timedelta(days=1)
  382. return yesterday, yesterday, "昨天"
  383. # 本周(周一到周日)
  384. if normalized == "this_week":
  385. # 计算本周一
  386. weekday = today.weekday() # 0=周一, 6=周日
  387. start = today - timedelta(days=weekday)
  388. end = start + timedelta(days=6)
  389. # 如果本周还没结束,end 不能超过今天
  390. if end > today:
  391. end = today
  392. return start, end, f"本周(周一到周日,{start.strftime('%m-%d')} 至 {end.strftime('%m-%d')})"
  393. # 上周(上周一到上周日)
  394. if normalized == "last_week":
  395. weekday = today.weekday()
  396. # 本周一
  397. this_monday = today - timedelta(days=weekday)
  398. # 上周一
  399. start = this_monday - timedelta(days=7)
  400. end = start + timedelta(days=6)
  401. return start, end, f"上周({start.strftime('%m-%d')} 至 {end.strftime('%m-%d')})"
  402. # 本月(本月1日到今天)
  403. if normalized == "this_month":
  404. start = today.replace(day=1)
  405. return start, today, f"本月({start.strftime('%m-%d')} 至 {today.strftime('%m-%d')})"
  406. # 上月(上月1日到上月最后一天)
  407. if normalized == "last_month":
  408. # 上月最后一天 = 本月1日 - 1天
  409. first_of_this_month = today.replace(day=1)
  410. end = first_of_this_month - timedelta(days=1)
  411. start = end.replace(day=1)
  412. return start, end, f"上月({start.strftime('%Y-%m-%d')} 至 {end.strftime('%Y-%m-%d')})"
  413. # 最近N天 (last_N_days 格式)
  414. match = re.match(r'last_(\d+)_days', normalized)
  415. if match:
  416. days = int(match.group(1))
  417. start = today - timedelta(days=days - 1) # 包含今天,所以是 days-1
  418. return start, today, f"最近{days}天({start.strftime('%m-%d')} 至 {today.strftime('%m-%d')})"
  419. # 兜底:返回今天
  420. return today, today, "今天(默认)"
  421. @staticmethod
  422. def get_supported_expressions() -> Dict[str, list]:
  423. """
  424. 获取支持的日期表达式列表
  425. Returns:
  426. 分类的表达式列表
  427. """
  428. return {
  429. "单日": ["今天", "昨天", "today", "yesterday"],
  430. "周": ["本周", "上周", "this week", "last week"],
  431. "月": ["本月", "上月", "this month", "last month"],
  432. "最近N天": ["最近3天", "最近7天", "最近14天", "最近30天",
  433. "last 3 days", "last 7 days", "last 14 days", "last 30 days"],
  434. "动态天数": ["最近N天", "last N days"]
  435. }