discourse/script/import_scripts/mbox/support/database.rb
Sam Saffron 30990006a9 DEV: enable frozen string literal on all files
This reduces chances of errors where consumers of strings mutate inputs
and reduces memory usage of the app.

Test suite passes now, but there may be some stuff left, so we will run
a few sites on a branch prior to merging
2019-05-13 09:31:32 +08:00

307 lines
8.2 KiB
Ruby

# frozen_string_literal: true
require 'sqlite3'
module ImportScripts::Mbox
class Database
SCHEMA_VERSION = 2
def initialize(directory, batch_size)
@db = SQLite3::Database.new("#{directory}/index.db", results_as_hash: true)
@batch_size = batch_size
configure_database
upgrade_schema_version
create_table_for_categories
create_table_for_imported_files
create_table_for_emails
create_table_for_replies
create_table_for_users
end
def transaction
@db.transaction
yield self
@db.commit
rescue
@db.rollback
end
def insert_category(category)
@db.execute(<<-SQL, category)
INSERT OR REPLACE INTO category (name, description)
VALUES (:name, :description)
SQL
end
def insert_imported_file(imported_file)
@db.execute(<<-SQL, imported_file)
INSERT OR REPLACE INTO imported_file (category, filename, checksum)
VALUES (:category, :filename, :checksum)
SQL
end
def insert_email(email)
@db.execute(<<-SQL, email)
INSERT OR REPLACE INTO email (msg_id, from_email, from_name, subject,
email_date, raw_message, body, elided, format, attachment_count, charset,
category, filename, first_line_number, last_line_number, index_duration)
VALUES (:msg_id, :from_email, :from_name, :subject,
:email_date, :raw_message, :body, :elided, :format, :attachment_count, :charset,
:category, :filename, :first_line_number, :last_line_number, :index_duration)
SQL
end
def insert_replies(msg_id, reply_message_ids)
sql = <<-SQL
INSERT OR REPLACE INTO reply (msg_id, in_reply_to)
VALUES (:msg_id, :in_reply_to)
SQL
@db.prepare(sql) do |stmt|
reply_message_ids.each do |in_reply_to|
stmt.execute(msg_id, in_reply_to)
end
end
end
def update_in_reply_to_of_emails
@db.execute <<-SQL
UPDATE email
SET in_reply_to = (
SELECT e.msg_id
FROM reply r
JOIN email e ON (r.in_reply_to = e.msg_id)
WHERE r.msg_id = email.msg_id
ORDER BY e.email_date DESC
LIMIT 1
)
SQL
end
def update_in_reply_to_by_email_subject
@db.execute <<-SQL
UPDATE email
SET in_reply_to = NULLIF((
SELECT e.msg_id
FROM email e
JOIN email_order o ON (e.msg_id = o.msg_id)
WHERE e.subject = email.subject
ORDER BY o.ROWID
LIMIT 1
), msg_id)
SQL
end
def sort_emails_by_date_and_reply_level
@db.execute 'DELETE FROM email_order'
@db.execute <<-SQL
WITH RECURSIVE
messages(msg_id, level, email_date, in_reply_to) AS (
SELECT msg_id, 0 AS level, email_date, in_reply_to
FROM email
WHERE in_reply_to IS NULL
UNION ALL
SELECT e.msg_id, m.level + 1, e.email_date, e.in_reply_to
FROM email e
JOIN messages m ON e.in_reply_to = m.msg_id
)
INSERT INTO email_order (msg_id)
SELECT c.msg_id
FROM messages c
LEFT OUTER JOIN messages p ON (c.in_reply_to = p.msg_id)
ORDER BY MAX(c.email_date, p.email_date), c.level, c.email_date, c.msg_id
SQL
end
def sort_emails_by_subject
@db.execute 'DELETE FROM email_order'
@db.execute <<-SQL
INSERT INTO email_order (msg_id)
SELECT msg_id
FROM email
ORDER BY subject, filename, ROWID
SQL
end
def fill_users_from_emails
@db.execute 'DELETE FROM user'
@db.execute <<-SQL
INSERT INTO user (email, name, date_of_first_message)
SELECT from_email, MIN(from_name) AS from_name, MIN(email_date)
FROM email
WHERE from_email IS NOT NULL AND email_date IS NOT NULL
GROUP BY from_email
ORDER BY from_email
SQL
end
def fetch_imported_files(category)
@db.execute(<<-SQL, category)
SELECT filename, checksum
FROM imported_file
WHERE category = :category
SQL
end
def fetch_categories
@db.execute <<-SQL
SELECT name, description
FROM category
ORDER BY name
SQL
end
def count_users
@db.get_first_value <<-SQL
SELECT COUNT(*)
FROM user
SQL
end
def fetch_users(last_email)
rows = @db.execute(<<-SQL, last_email)
SELECT email, name, date_of_first_message
FROM user
WHERE email > :last_email
LIMIT #{@batch_size}
SQL
add_last_column_value(rows, 'email')
end
def count_messages
@db.get_first_value <<-SQL
SELECT COUNT(*)
FROM email
SQL
end
def fetch_messages(last_row_id)
rows = @db.execute(<<-SQL, last_row_id)
SELECT o.ROWID, e.msg_id, from_email, subject, email_date, in_reply_to,
raw_message, body, elided, format, attachment_count, category
FROM email e
JOIN email_order o USING (msg_id)
WHERE o.ROWID > :last_row_id
ORDER BY o.ROWID
LIMIT #{@batch_size}
SQL
add_last_column_value(rows, 'rowid')
end
private
def configure_database
@db.execute 'PRAGMA journal_mode = OFF'
@db.execute 'PRAGMA locking_mode = EXCLUSIVE'
end
def upgrade_schema_version
current_version = @db.get_first_value("PRAGMA user_version")
case current_version
when 1
@db.execute "ALTER TABLE email ADD COLUMN index_duration REAL"
end
@db.execute "PRAGMA user_version = #{SCHEMA_VERSION}"
end
def create_table_for_categories
@db.execute <<-SQL
CREATE TABLE IF NOT EXISTS category (
name TEXT NOT NULL PRIMARY KEY,
description TEXT
)
SQL
end
def create_table_for_imported_files
@db.execute <<-SQL
CREATE TABLE IF NOT EXISTS imported_file (
category TEXT NOT NULL,
filename TEXT NOT NULL,
checksum TEXT NOT NULL,
PRIMARY KEY (category, filename),
FOREIGN KEY(category) REFERENCES category(name)
)
SQL
end
def create_table_for_emails
@db.execute <<-SQL
CREATE TABLE IF NOT EXISTS email (
msg_id TEXT NOT NULL PRIMARY KEY,
from_email TEXT,
from_name TEXT,
subject TEXT,
in_reply_to TEXT,
email_date DATETIME,
raw_message TEXT,
body TEXT,
elided TEXT,
format INTEGER,
attachment_count INTEGER NOT NULL DEFAULT 0,
charset TEXT,
category TEXT NOT NULL,
filename TEXT NOT NULL,
first_line_number INTEGER,
last_line_number INTEGER,
index_duration REAL,
FOREIGN KEY(category) REFERENCES category(name)
)
SQL
@db.execute 'CREATE INDEX IF NOT EXISTS email_by_from ON email (from_email)'
@db.execute 'CREATE INDEX IF NOT EXISTS email_by_subject ON email (subject)'
@db.execute 'CREATE INDEX IF NOT EXISTS email_by_in_reply_to ON email (in_reply_to)'
@db.execute 'CREATE INDEX IF NOT EXISTS email_by_date ON email (email_date)'
@db.execute <<-SQL
CREATE TABLE IF NOT EXISTS email_order (
msg_id TEXT NOT NULL PRIMARY KEY
)
SQL
end
def create_table_for_replies
@db.execute <<-SQL
CREATE TABLE IF NOT EXISTS reply (
msg_id TEXT NOT NULL,
in_reply_to TEXT NOT NULL,
PRIMARY KEY (msg_id, in_reply_to),
FOREIGN KEY(msg_id) REFERENCES email(msg_id)
)
SQL
@db.execute 'CREATE INDEX IF NOT EXISTS reply_by_in_reply_to ON reply (in_reply_to)'
end
def create_table_for_users
@db.execute <<-SQL
CREATE TABLE IF NOT EXISTS user (
email TEXT NOT NULL PRIMARY KEY,
name TEXT,
date_of_first_message DATETIME NOT NULL
)
SQL
end
def add_last_column_value(rows, *last_columns)
return rows if last_columns.empty?
result = [rows]
last_row = rows.last
last_columns.each { |column| result.push(last_row ? last_row[column] : nil) }
result
end
end
end