schema.sql 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121
  1. -- TrendRadar 数据库表结构
  2. -- ============================================
  3. -- 平台信息表
  4. -- 核心:id 不变,name 可变
  5. -- ============================================
  6. CREATE TABLE IF NOT EXISTS platforms (
  7. id TEXT PRIMARY KEY,
  8. name TEXT NOT NULL,
  9. is_active INTEGER DEFAULT 1,
  10. updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
  11. );
  12. -- ============================================
  13. -- 新闻条目表
  14. -- 以 URL + platform_id 为唯一标识,支持去重存储
  15. -- ============================================
  16. CREATE TABLE IF NOT EXISTS news_items (
  17. id INTEGER PRIMARY KEY AUTOINCREMENT,
  18. title TEXT NOT NULL,
  19. platform_id TEXT NOT NULL,
  20. rank INTEGER NOT NULL,
  21. url TEXT DEFAULT '',
  22. mobile_url TEXT DEFAULT '',
  23. first_crawl_time TEXT NOT NULL, -- 首次抓取时间
  24. last_crawl_time TEXT NOT NULL, -- 最后抓取时间
  25. crawl_count INTEGER DEFAULT 1, -- 抓取次数
  26. created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  27. updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  28. FOREIGN KEY (platform_id) REFERENCES platforms(id)
  29. );
  30. -- ============================================
  31. -- 标题变更历史表
  32. -- 记录同一 URL 下标题的变化
  33. -- ============================================
  34. CREATE TABLE IF NOT EXISTS title_changes (
  35. id INTEGER PRIMARY KEY AUTOINCREMENT,
  36. news_item_id INTEGER NOT NULL,
  37. old_title TEXT NOT NULL,
  38. new_title TEXT NOT NULL,
  39. changed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  40. FOREIGN KEY (news_item_id) REFERENCES news_items(id)
  41. );
  42. -- ============================================
  43. -- 排名历史表
  44. -- 记录每次抓取时的排名变化
  45. -- ============================================
  46. CREATE TABLE IF NOT EXISTS rank_history (
  47. id INTEGER PRIMARY KEY AUTOINCREMENT,
  48. news_item_id INTEGER NOT NULL,
  49. rank INTEGER NOT NULL,
  50. crawl_time TEXT NOT NULL,
  51. created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  52. FOREIGN KEY (news_item_id) REFERENCES news_items(id)
  53. );
  54. -- ============================================
  55. -- 抓取记录表
  56. -- 记录每次抓取的时间和数量
  57. -- ============================================
  58. CREATE TABLE IF NOT EXISTS crawl_records (
  59. id INTEGER PRIMARY KEY AUTOINCREMENT,
  60. crawl_time TEXT NOT NULL UNIQUE,
  61. total_items INTEGER DEFAULT 0,
  62. created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
  63. );
  64. -- ============================================
  65. -- 抓取来源状态表
  66. -- 记录每次抓取各平台的成功/失败状态
  67. -- ============================================
  68. CREATE TABLE IF NOT EXISTS crawl_source_status (
  69. crawl_record_id INTEGER NOT NULL,
  70. platform_id TEXT NOT NULL,
  71. status TEXT NOT NULL CHECK(status IN ('success', 'failed')),
  72. PRIMARY KEY (crawl_record_id, platform_id),
  73. FOREIGN KEY (crawl_record_id) REFERENCES crawl_records(id),
  74. FOREIGN KEY (platform_id) REFERENCES platforms(id)
  75. );
  76. -- ============================================
  77. -- 推送记录表
  78. -- 用于 push_window once_per_day 功能
  79. -- 以及 ai_analysis analysis_window once_per_day 功能
  80. -- ============================================
  81. CREATE TABLE IF NOT EXISTS push_records (
  82. id INTEGER PRIMARY KEY AUTOINCREMENT,
  83. date TEXT NOT NULL UNIQUE,
  84. pushed INTEGER DEFAULT 0,
  85. push_time TEXT,
  86. report_type TEXT,
  87. ai_analyzed INTEGER DEFAULT 0,
  88. ai_analysis_time TEXT,
  89. ai_analysis_mode TEXT,
  90. created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
  91. );
  92. -- ============================================
  93. -- 索引定义
  94. -- ============================================
  95. -- 平台索引
  96. CREATE INDEX IF NOT EXISTS idx_news_platform ON news_items(platform_id);
  97. -- 时间索引(用于查询最新数据)
  98. CREATE INDEX IF NOT EXISTS idx_news_crawl_time ON news_items(last_crawl_time);
  99. -- 标题索引(用于标题搜索)
  100. CREATE INDEX IF NOT EXISTS idx_news_title ON news_items(title);
  101. -- URL + platform_id 唯一索引(仅对非空 URL,实现去重)
  102. CREATE UNIQUE INDEX IF NOT EXISTS idx_news_url_platform
  103. ON news_items(url, platform_id) WHERE url != '';
  104. -- 抓取状态索引
  105. CREATE INDEX IF NOT EXISTS idx_crawl_status_record ON crawl_source_status(crawl_record_id);
  106. -- 排名历史索引
  107. CREATE INDEX IF NOT EXISTS idx_rank_history_news ON rank_history(news_item_id);