schema.sql 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. -- TrendRadar 数据库表结构
  2. -- ============================================
  3. -- 平台信息表
  4. -- 核心:id 不变,name 可变
  5. -- ============================================
  6. CREATE TABLE IF NOT EXISTS platforms (
  7. id TEXT PRIMARY KEY,
  8. name TEXT NOT NULL,
  9. is_active INTEGER DEFAULT 1,
  10. updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
  11. );
  12. -- ============================================
  13. -- 新闻条目表
  14. -- 以 URL + platform_id 为唯一标识,支持去重存储
  15. -- ============================================
  16. CREATE TABLE IF NOT EXISTS news_items (
  17. id INTEGER PRIMARY KEY AUTOINCREMENT,
  18. title TEXT NOT NULL,
  19. platform_id TEXT NOT NULL,
  20. rank INTEGER NOT NULL,
  21. url TEXT DEFAULT '',
  22. mobile_url TEXT DEFAULT '',
  23. first_crawl_time TEXT NOT NULL, -- 首次抓取时间
  24. last_crawl_time TEXT NOT NULL, -- 最后抓取时间
  25. crawl_count INTEGER DEFAULT 1, -- 抓取次数
  26. created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  27. updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  28. FOREIGN KEY (platform_id) REFERENCES platforms(id)
  29. );
  30. -- ============================================
  31. -- 标题变更历史表
  32. -- 记录同一 URL 下标题的变化
  33. -- ============================================
  34. CREATE TABLE IF NOT EXISTS title_changes (
  35. id INTEGER PRIMARY KEY AUTOINCREMENT,
  36. news_item_id INTEGER NOT NULL,
  37. old_title TEXT NOT NULL,
  38. new_title TEXT NOT NULL,
  39. changed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  40. FOREIGN KEY (news_item_id) REFERENCES news_items(id)
  41. );
  42. -- ============================================
  43. -- 排名历史表
  44. -- 记录每次抓取时的排名变化
  45. -- ============================================
  46. CREATE TABLE IF NOT EXISTS rank_history (
  47. id INTEGER PRIMARY KEY AUTOINCREMENT,
  48. news_item_id INTEGER NOT NULL,
  49. rank INTEGER NOT NULL,
  50. crawl_time TEXT NOT NULL,
  51. created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  52. FOREIGN KEY (news_item_id) REFERENCES news_items(id)
  53. );
  54. -- ============================================
  55. -- 抓取记录表
  56. -- 记录每次抓取的时间和数量
  57. -- ============================================
  58. CREATE TABLE IF NOT EXISTS crawl_records (
  59. id INTEGER PRIMARY KEY AUTOINCREMENT,
  60. crawl_time TEXT NOT NULL UNIQUE,
  61. total_items INTEGER DEFAULT 0,
  62. created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
  63. );
  64. -- ============================================
  65. -- 抓取来源状态表
  66. -- 记录每次抓取各平台的成功/失败状态
  67. -- ============================================
  68. CREATE TABLE IF NOT EXISTS crawl_source_status (
  69. crawl_record_id INTEGER NOT NULL,
  70. platform_id TEXT NOT NULL,
  71. status TEXT NOT NULL CHECK(status IN ('success', 'failed')),
  72. PRIMARY KEY (crawl_record_id, platform_id),
  73. FOREIGN KEY (crawl_record_id) REFERENCES crawl_records(id),
  74. FOREIGN KEY (platform_id) REFERENCES platforms(id)
  75. );
  76. -- ============================================
  77. -- 时间段执行记录表
  78. -- 记录每天每个时间段在各 action 维度的执行状态(用于 once 功能)
  79. -- 替代旧的 push_records 表
  80. -- ============================================
  81. CREATE TABLE IF NOT EXISTS period_executions (
  82. id INTEGER PRIMARY KEY AUTOINCREMENT,
  83. execution_date TEXT NOT NULL, -- YYYY-MM-DD
  84. period_key TEXT NOT NULL, -- period 的稳定 key
  85. action TEXT NOT NULL, -- analyze | push
  86. executed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
  87. UNIQUE(execution_date, period_key, action)
  88. );
  89. -- ============================================
  90. -- 索引定义
  91. -- ============================================
  92. -- 平台索引
  93. CREATE INDEX IF NOT EXISTS idx_news_platform ON news_items(platform_id);
  94. -- 时间索引(用于查询最新数据)
  95. CREATE INDEX IF NOT EXISTS idx_news_crawl_time ON news_items(last_crawl_time);
  96. -- 标题索引(用于标题搜索)
  97. CREATE INDEX IF NOT EXISTS idx_news_title ON news_items(title);
  98. -- URL + platform_id 唯一索引(仅对非空 URL,实现去重)
  99. CREATE UNIQUE INDEX IF NOT EXISTS idx_news_url_platform
  100. ON news_items(url, platform_id) WHERE url != '';
  101. -- 抓取状态索引
  102. CREATE INDEX IF NOT EXISTS idx_crawl_status_record ON crawl_source_status(crawl_record_id);
  103. -- 排名历史索引
  104. CREATE INDEX IF NOT EXISTS idx_rank_history_news ON rank_history(news_item_id);
  105. -- 时间段执行记录索引
  106. CREATE INDEX IF NOT EXISTS idx_period_exec_lookup
  107. ON period_executions(execution_date, period_key, action);