DEV: Prepare new structure for migrations-tooling ()

* Moves existing files around. All essential scripts are in `migrations/bin`, and non-essential scripts like benchmarks are in `migrations/scripts`
* Dependabot configuration for migrations-tooling (disabled for now)
* Updates test configuration for migrations-tooling
* Shorter configuration for intermediate DB for now. We will add the rest table by table.
* Adds a couple of benchmark scripts
* RSpec setup especially for migrations-tooling and the first tests
* Adds sorting/formatting to the `generate_schema` script
This commit is contained in:
Gerhard Schlager 2024-04-15 18:47:40 +02:00 committed by GitHub
parent 831da05103
commit d286c1d5a1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
34 changed files with 1287 additions and 396 deletions

@ -81,3 +81,11 @@ updates:
types:
patterns:
- "@types/*"
# - package-ecosystem: "bundler"
# directory: "migrations/config/gemfiles/convert"
# schedule:
# interval: "weekly"
# day: "wednesday"
# time: "10:00"
# timezone: "Europe/Vienna"
# versioning-strategy: "increase"

@ -23,7 +23,7 @@ permissions:
jobs:
tests:
if: github.event_name == 'pull_request' || github.repository != 'discourse/discourse-private-mirror'
name: Ruby ${{ matrix.ruby }}
name: Tests with Ruby ${{ matrix.ruby }}
runs-on: 'ubuntu-latest'
container: discourse/discourse_test:slim
timeout-minutes: 20
@ -73,7 +73,12 @@ jobs:
uses: actions/cache@v4
with:
path: vendor/bundle
key: ${{ runner.os }}-${{ steps.container-envs.outputs.ruby_version }}-${{ steps.container-envs.outputs.debian_release }}-gem-${{ hashFiles('**/Gemfile.lock') }}
key: >-
${{ runner.os }}-
${{ steps.container-envs.outputs.ruby_version }}-
${{ steps.container-envs.outputs.debian_release }}-
${{ hashFiles('**/Gemfile.lock') }}-
${{ hashFiles('migrations/config/gemfiles/**/Gemfile') }}
- name: Setup gems
run: |
@ -82,7 +87,7 @@ jobs:
bundle config --local deployment true
bundle config --local without development
bundle install --jobs $(($(nproc) - 1))
bundle clean
# don't call `bundle clean` clean, we need the gems for the migrations
- name: Get yarn cache directory
id: yarn-cache-dir
@ -131,11 +136,40 @@ jobs:
if: steps.app-cache.outputs.cache-hit != 'true'
run: rm -rf tmp/app-cache/uploads && cp -r public/uploads tmp/app-cache/uploads
- name: Check core database drift
run: |
mkdir /tmp/intermediate_db
./migrations/scripts/schema_generator /tmp/intermediate_db/base_migration.sql
diff -u migrations/common/intermediate_db_schema/000_base_schema.sql /tmp/intermediate_db/base_migration.sql
# - name: Check core database drift
# run: |
# mkdir /tmp/intermediate_db
# ./migrations/scripts/schema_generator /tmp/intermediate_db/base_migration.sql
# diff -u migrations/common/intermediate_db_schema/000_base_schema.sql /tmp/intermediate_db/base_migration.sql
- name: RSpec
run: bin/rspec migrations/spec/
run: bin/rspec --default-path migrations/spec
runtime:
if: github.event_name == 'pull_request' || github.repository != 'discourse/discourse-private-mirror'
name: Runs on ${{ matrix.os }}, Ruby ${{ matrix.ruby }}
timeout-minutes: 20
strategy:
fail-fast: false
matrix:
os: ["ubuntu-latest", "macos-latest"]
ruby: ["3.2", "3.3"]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 1
- name: Setup Ruby
uses: ruby/setup-ruby@v1
with:
ruby-version: ${{ matrix.ruby }}
bundler-cache: true
- name: Run converter
working-directory: migrations
run: bin/convert version

@ -1,2 +1,4 @@
!/db/schema/*.sql
tmp/*
!/common/intermediate_db_schema/*.sql
Gemfile.lock

@ -0,0 +1,11 @@
# Migrations Tooling
## Development
### Running tests
You need to execute `rspec` in the root of the project.
```bash
bin/rspec --default-path migrations/spec
```

32
migrations/bin/convert Executable file

@ -0,0 +1,32 @@
#!/usr/bin/env ruby
# frozen_string_literal: true
require_relative "../lib/migrations"
module Migrations
load_gemfiles("common")
configure_zeitwerk("lib/common", "lib/converters")
module Convert
class CLI < Thor
desc "execute", "Run the conversion"
def execute
FileUtils.mkdir_p("/tmp/converter")
::Migrations::IntermediateDatabaseMigrator.reset!("/tmp/converter/intermediate.db")
::Migrations::IntermediateDatabaseMigrator.migrate("/tmp/converter/intermediate.db")
# require_relative "converters/pepper/main"
end
desc "version", "Print the version"
def version
puts "0.0.1"
end
end
end
end
Migrations::Convert::CLI.start(ARGV)

@ -1,14 +1,19 @@
#!/usr/bin/env ruby
# frozen_string_literal: true
puts "Loading application..."
require_relative "../config/environment"
require_relative "../lib/migrations"
module Migrations
load_rails_environment
load_gemfiles("common")
configure_zeitwerk("lib/common")
module Import
class << self
def run
puts "Importing into Discourse #{Discourse::VERSION::STRING}"
puts "Extralite SQLite version: #{Extralite.sqlite3_version}"
end
end
end

@ -1,375 +0,0 @@
/*
This file is auto-generated from the Discourse core database schema. Instead of editing it directly,
please update the `schema.yml` configuration file and re-run the `generate_schema` script to update it.
*/
CREATE TABLE badges (
id INTEGER NOT NULL PRIMARY KEY,
name TEXT NOT NULL,
description TEXT,
badge_type_id INTEGER NOT NULL,
created_at DATETIME NOT NULL,
multiple_grant BOOLEAN NOT NULL,
query TEXT,
long_description TEXT,
image_upload_id INTEGER,
bage_group TEXT
);
CREATE TABLE categories (
id INTEGER NOT NULL PRIMARY KEY,
name TEXT NOT NULL,
color TEXT NOT NULL,
created_at DATETIME NOT NULL,
slug TEXT NOT NULL,
description TEXT,
text_color TEXT NOT NULL,
read_restricted BOOLEAN NOT NULL,
position INTEGER,
parent_category_id INTEGER,
about_topic_title TEXT,
old_relative_url TEXT,
existing_id INTEGER,
permissions JSON_TEXT,
logo_upload_id TEXT,
tag_group_ids JSON_TEXT
);
CREATE TABLE category_custom_fields (
category_id INTEGER NOT NULL,
name TEXT NOT NULL,
value TEXT,
PRIMARY KEY (category_id, name)
);
CREATE TABLE config (
name TEXT NOT NULL PRIMARY KEY,
value TEXT NOT NULL
);
CREATE TABLE group_members (
group_id INTEGER,
user_id INTEGER,
owner BOOLEAN,
PRIMARY KEY (group_id, user_id)
);
CREATE TABLE groups (
id INTEGER NOT NULL PRIMARY KEY,
name TEXT NOT NULL,
full_name TEXT,
visibility_level INTEGER NOT NULL,
messageable_level INTEGER,
mentionable_level INTEGER,
members_visibility_level INTEGER NOT NULL,
description TEXT
);
CREATE TABLE likes (
post_id INTEGER NOT NULL,
user_id INTEGER NOT NULL,
created_at DATETIME NOT NULL,
PRIMARY KEY (user_id, post_id)
);
CREATE TABLE log_entries (
created_at DATETIME NOT NULL,
type TEXT NOT NULL,
message TEXT NOT NULL,
exception TEXT,
details TEXT
);
CREATE TABLE muted_users (
user_id INTEGER NOT NULL,
muted_user_id INTEGER NOT NULL,
PRIMARY KEY (user_id, muted_user_id)
);
CREATE TABLE permalink_normalizations (
normalization TEXT NOT NULL PRIMARY KEY
);
CREATE TABLE poll_options (
id INTEGER NOT NULL PRIMARY KEY,
poll_id INTEGER NOT NULL,
created_at DATETIME,
text TEXT NOT NULL,
position INTEGER
);
CREATE TABLE poll_votes (
poll_option_id INTEGER NOT NULL,
user_id INTEGER NOT NULL,
created_at DATETIME,
PRIMARY KEY (poll_option_id, user_id)
);
CREATE TABLE polls (
id INTEGER NOT NULL PRIMARY KEY,
post_id INTEGER,
name TEXT NOT NULL,
close_at DATETIME,
type INTEGER NOT NULL,
status INTEGER NOT NULL,
results INTEGER NOT NULL,
visibility INTEGER NOT NULL,
min INTEGER,
max INTEGER,
step INTEGER,
anonymous_voters INTEGER,
created_at DATETIME NOT NULL,
chart_type INTEGER NOT NULL,
groups TEXT,
title TEXT
);
CREATE TABLE post_custom_fields (
post_id INTEGER NOT NULL,
name TEXT NOT NULL,
value TEXT,
PRIMARY KEY (post_id, name)
);
CREATE TABLE posts (
id INTEGER NOT NULL PRIMARY KEY,
user_id INTEGER,
topic_id INTEGER NOT NULL,
post_number INTEGER NOT NULL,
raw TEXT NOT NULL,
created_at DATETIME NOT NULL,
like_count INTEGER NOT NULL,
reply_to_post_id TEXT,
original_raw TEXT,
upload_ids JSON_TEXT,
old_relative_url TEXT,
accepted_answer BOOLEAN,
small_action TEXT,
whisper BOOLEAN,
placeholders JSON_TEXT
);
CREATE INDEX posts_by_topic_post_number ON posts (topic_id, post_number);
CREATE TABLE schema_migrations (
path TEXT NOT NULL PRIMARY KEY,
created_at DATETIME
);
CREATE TABLE site_settings (
name TEXT NOT NULL,
value TEXT,
action TEXT
);
CREATE TABLE tag_groups (
id INTEGER NOT NULL PRIMARY KEY,
name TEXT NOT NULL
);
CREATE TABLE tag_users (
tag_id INTEGER NOT NULL,
user_id INTEGER NOT NULL,
notification_level INTEGER NOT NULL,
PRIMARY KEY (tag_id, user_id)
);
CREATE TABLE tags (
id INTEGER NOT NULL PRIMARY KEY,
name TEXT NOT NULL,
tag_group_id INTEGER
);
CREATE TABLE topic_tags (
topic_id INTEGER NOT NULL,
tag_id INTEGER NOT NULL,
PRIMARY KEY (topic_id, tag_id)
);
CREATE TABLE topic_users (
user_id INTEGER NOT NULL,
topic_id INTEGER NOT NULL,
last_read_post_number INTEGER,
last_visited_at DATETIME,
first_visited_at DATETIME,
notification_level INTEGER NOT NULL,
notifications_changed_at DATETIME,
notifications_reason_id INTEGER,
total_msecs_viewed INTEGER NOT NULL,
PRIMARY KEY (user_id, topic_id)
);
CREATE TABLE topics (
id INTEGER NOT NULL PRIMARY KEY,
title TEXT NOT NULL,
created_at DATETIME NOT NULL,
views INTEGER NOT NULL,
user_id INTEGER,
category_id INTEGER,
visible BOOLEAN NOT NULL,
closed BOOLEAN NOT NULL,
archived BOOLEAN NOT NULL,
pinned_at DATETIME,
subtype TEXT,
pinned_globally BOOLEAN NOT NULL,
pinned_until DATETIME,
old_relative_url TEXT,
private_message TEXT
);
CREATE TABLE uploads (
id INTEGER NOT NULL PRIMARY KEY,
user_id INTEGER NOT NULL,
filename TEXT NOT NULL,
relative_path TEXT,
type TEXT,
data BLOB
);
CREATE TABLE user_badges (
badge_id INTEGER NOT NULL,
user_id INTEGER NOT NULL,
granted_at DATETIME NOT NULL
);
CREATE TABLE user_field_values (
user_id INTEGER NOT NULL,
field_id INTEGER NOT NULL,
is_multiselect_field BOOLEAN NOT NULL,
value TEXT
);
CREATE UNIQUE INDEX user_field_values_multiselect ON user_field_values (user_id, field_id, value) WHERE is_multiselect_field = TRUE;
CREATE UNIQUE INDEX user_field_values_not_multiselect ON user_field_values (user_id, field_id) WHERE is_multiselect_field = FALSE;
CREATE TABLE user_fields (
id INTEGER NOT NULL PRIMARY KEY,
name TEXT NOT NULL,
field_type TEXT NOT NULL,
editable BOOLEAN NOT NULL,
description TEXT NOT NULL,
required BOOLEAN NOT NULL,
show_on_profile BOOLEAN NOT NULL,
position INTEGER,
show_on_user_card BOOLEAN NOT NULL,
searchable BOOLEAN NOT NULL,
options JSON_TEXT
);
CREATE TABLE users (
id INTEGER NOT NULL PRIMARY KEY,
username TEXT NOT NULL,
created_at DATETIME,
name TEXT,
last_seen_at DATETIME,
admin BOOLEAN NOT NULL,
trust_level INTEGER,
approved BOOLEAN NOT NULL,
approved_at DATETIME,
date_of_birth DATE,
moderator BOOLEAN,
registration_ip_address TEXT,
staged BOOLEAN,
email TEXT,
avatar_path TEXT,
avatar_url TEXT,
avatar_upload_id TEXT,
bio TEXT,
password TEXT,
suspension TEXT,
location TEXT,
website TEXT,
old_relative_url TEXT,
sso_record TEXT,
anonymized BOOLEAN,
original_username TEXT,
timezone TEXT,
email_level INTEGER,
email_messages_level INTEGER,
email_digests BOOLEAN
);
/*
Core table columns implicitly excluded from the generated schema above via the `include` configuration option
in `schema.yml`. This serves as an inventory of these columns, allowing new core additions to be tracked and,
if necessary, synchronized with the intermediate database schema.
Table: groups
--------------
created_at datetime false
automatic boolean false
user_count integer false
automatic_membership_email_domains text true
primary_group boolean false
title string true
grant_trust_level integer true
incoming_email string true
has_messages boolean false
flair_bg_color string true
flair_color string true
bio_raw text true
bio_cooked text true
allow_membership_requests boolean false
default_notification_level integer false
public_exit boolean false
public_admission boolean false
membership_request_template text true
smtp_server string true
smtp_port integer true
smtp_ssl boolean true
imap_server string true
imap_port integer true
imap_ssl boolean true
imap_mailbox_name string false
imap_uid_validity integer false
imap_last_uid integer false
email_username string true
email_password string true
publish_read_state boolean false
imap_last_error text true
imap_old_emails integer true
imap_new_emails integer true
flair_icon string true
flair_upload_id integer true
allow_unknown_sender_topic_replies boolean false
smtp_enabled boolean true
smtp_updated_at datetime true
smtp_updated_by_id integer true
imap_enabled boolean true
imap_updated_at datetime true
imap_updated_by_id integer true
email_from_alias string true
Table: site_settings
---------------------
id integer false
data_type integer false
created_at datetime false
Table: tag_groups
------------------
created_at datetime false
parent_tag_id integer true
one_per_topic boolean true
Table: tags
------------
created_at datetime false
pm_topic_count integer false
target_tag_id integer true
description string true
public_topic_count integer false
staff_topic_count integer false
Table: user_badges
-------------------
id integer false
granted_by_id integer false
post_id integer true
notification_id integer true
seq integer false
featured_rank integer true
created_at datetime false
is_favorite boolean true
*/

@ -0,0 +1,22 @@
## Gemfiles for migrations-tooling
This directory contains Gemfiles for the migration related tools.
Those tools use `bundler/inline`, so this isn't strictly needed. However, we use GitHub's Dependabot to keep the
dependencies up-to-date, and it requires a Gemfile to work. Also, it's easier to test the tools with a Gemfile.
Please add an entry in the `.github/workflows/dependabot.yml` file when you add a new Gemfile to enable Dependabot for
the Gemfile.
#### Example
```yaml
- package-ecosystem: "bundler"
directory: "migrations/config/gemfiles/convert"
schedule:
interval: "weekly"
day: "wednesday"
time: "10:00"
timezone: "Europe/Vienna"
versioning-strategy: "increase"
```

@ -0,0 +1,26 @@
# frozen_string_literal: true
source "https://rubygems.org"
# the minimal Ruby version required by migration-tooling
ruby ">= 3.2.2"
# `activesupport` gem needs to be in sync with the Rails version of Discourse, see `/Gemfile`
gem "activesupport", "< 7.1", require: "active_support"
# for SQLite
gem "extralite-bundle",
"~> 2.8",
require: "extralite",
github: "digital-fabric/extralite"
gem "lru_redux", "~> 1.1", require: false
# for communication between process forks
gem "msgpack", "~> 1.7"
# for CLI
gem "colored2", "~> 4.0"
gem "thor", "~> 1.3"
# auto-loading
gem "zeitwerk", "~> 2.6"

@ -0,0 +1,50 @@
## Configuration options for the base intermediate schema generator
##
## After modifying this file, regenerate the base intermediate schema
## by running the `generate_schema` script.
# Default relative path for generated base schema file.
# An absolute path can also be provided to the script as the first CLI argument.
# If the CLI argument is present, it takes precedence over the value specified here.
output_file_path: "../db/schema/100-base-schema.sql"
## Tables to include in the generated base intermediate schema.
##
## Available table options:
## virtual: Boolean. Enables the inclusion of a table in the schema solely based.
## on the provided configuration. A virtual table does not need to be available in the core schema.
## ignore: List of columns to ignore. Convenient if most of the table's column are needed.
## Usage is mutually exclusive with the `include` option. Only one should be used at a time.
## include: List of columns to include. Convenient if only a few columns are needed.
## Usage is mutually exclusive with the `include`` option. Only one should be used at a time.
## primary_key: Literal or list of columns to use as primary key.
## extend: List of objects describing columns to be added/extended.
## The following options are available for an "extend" object:
## name: Required. The name of the column being extended.
## is_null: Specifies if the column can be null.
## type: Column type. Defaults to TEXT.
## indexes: List of indexes to create. The following options are available for an "index" object:
## name: Index name.
## columns: List of column(s) to index.
tables:
users:
ignore:
- flag_level
- last_emailed_at
- last_posted_at
- last_seen_reviewable_id
- password_algorithm
- password_hash
- salt
- secure_identifier
- seen_notification_id
- username_lower
## Schema-wide column configuration options. These options apply to all tables.
## See table specific column configuration options above.
##
## Available Options:
## ignore: List of core/plugin table columns to ignore and exclude from intermediate schema.
columns:
ignore:
- updated_at

@ -1,2 +0,0 @@
#!/usr/bin/env ruby
# frozen_string_literal: true

5
migrations/db/README.md Normal file

@ -0,0 +1,5 @@
# Intermediate Database
TODO: Document stuff here.
## Database Schema

@ -0,0 +1,6 @@
CREATE TABLE schema_migrations
(
path TEXT NOT NULL PRIMARY KEY,
created_at DATETIME NOT NULL,
sql_hash TEXT NOT NULL
);

@ -0,0 +1,5 @@
CREATE TABLE config
(
name TEXT NOT NULL PRIMARY KEY,
value TEXT NOT NULL
);

@ -0,0 +1,8 @@
CREATE TABLE log_entries
(
created_at DATETIME NOT NULL,
type TEXT NOT NULL,
message TEXT NOT NULL,
exception TEXT,
details TEXT
);

@ -0,0 +1,37 @@
/*
This file is auto-generated from the Discourse core database schema. Instead of editing it directly,
please update the `schema.yml` configuration file and re-run the `generate_schema` script to update it.
*/
CREATE TABLE users
(
id INTEGER NOT NULL PRIMARY KEY,
active BOOLEAN NOT NULL,
admin BOOLEAN NOT NULL,
approved BOOLEAN NOT NULL,
created_at DATETIME NOT NULL,
staged BOOLEAN NOT NULL,
trust_level INTEGER NOT NULL,
username TEXT NOT NULL,
views INTEGER NOT NULL,
approved_at DATETIME,
approved_by_id INTEGER,
date_of_birth DATE,
first_seen_at DATETIME,
flair_group_id INTEGER,
group_locked_trust_level INTEGER,
ip_address TEXT,
last_seen_at DATETIME,
locale TEXT,
manual_locked_trust_level INTEGER,
moderator BOOLEAN,
name TEXT,
previous_visit_at DATETIME,
primary_group_id INTEGER,
registration_ip_address TEXT,
silenced_till DATETIME,
suspended_at DATETIME,
suspended_till DATETIME,
title TEXT,
uploaded_avatar_id INTEGER
);

@ -0,0 +1,121 @@
# frozen_string_literal: true
require "extralite"
require "lru_redux"
module Migrations
class IntermediateDatabase
DEFAULT_JOURNAL_MODE = "wal"
TRANSACTION_BATCH_SIZE = 1000
PREPARED_STATEMENT_CACHE_SIZE = 5
def self.create_connection(path:, journal_mode: DEFAULT_JOURNAL_MODE)
db = ::Extralite::Database.new(path)
db.pragma(
busy_timeout: 60_000, # 60 seconds
journal_mode: journal_mode,
synchronous: "off",
temp_store: "memory",
locking_mode: journal_mode == "wal" ? "normal" : "exclusive",
cache_size: -10_000, # 10_000 pages
)
db
end
def self.connect
db = self.class.new
yield(db)
ensure
db.close if db
end
attr_reader :connection
attr_reader :path
def initialize(path:, journal_mode: DEFAULT_JOURNAL_MODE)
@path = path
@journal_mode = journal_mode
@connection = self.class.create_connection(path: path, journal_mode: journal_mode)
@statement_counter = 0
# don't cache too many prepared statements
@statement_cache = PreparedStatementCache.new(PREPARED_STATEMENT_CACHE_SIZE)
end
def close
if @connection
commit_transaction
@statement_cache.clear
@connection.close
end
@connection = nil
@statement_counter = 0
end
def reconnect
close
@connection = self.class.create_connection(path: @path, journal_mode: @journal_mode)
end
def copy_from(source_db_paths)
commit_transaction
@statement_counter = 0
table_names = get_table_names
insert_actions = { "config" => "OR REPLACE", "uploads" => "OR IGNORE" }
source_db_paths.each do |source_db_path|
@connection.execute("ATTACH DATABASE ? AS source", source_db_path)
table_names.each do |table_name|
or_action = insert_actions[table_name] || ""
@connection.execute(
"INSERT #{or_action} INTO #{table_name} SELECT * FROM source.#{table_name}",
)
end
@connection.execute("DETACH DATABASE source")
end
end
def begin_transaction
return if @connection.transaction_active?
@connection.execute("BEGIN DEFERRED TRANSACTION")
end
def commit_transaction
return unless @connection.transaction_active?
@connection.execute("COMMIT")
end
private
def insert(sql, *parameters)
begin_transaction if @statement_counter == 0
stmt = @statement_cache.getset(sql) { @connection.prepare(sql) }
stmt.execute(*parameters)
if (@statement_counter += 1) > TRANSACTION_BATCH_SIZE
commit_transaction
@statement_counter = 0
end
end
def iso8601(column_name, alias_name = nil)
alias_name ||= column_name.split(".").last
"strftime('%Y-%m-%dT%H:%M:%SZ', #{column_name}) AS #{alias_name}"
end
def get_table_names
@connection.query_splat(<<~SQL)
SELECT name
FROM sqlite_schema
WHERE type = 'table'
AND name NOT LIKE 'sqlite_%'
AND name NOT IN ('schema_migrations', 'config')
SQL
end
end
end

@ -0,0 +1,58 @@
# frozen_string_literal: true
module Migrations
class IntermediateDatabaseMigrator
class << self
def reset!(path)
[path, "#{path}-wal", "#{path}-shm"].each { |p| FileUtils.rm_f(p) if File.exist?(p) }
end
def migrate(path)
connection = IntermediateDatabase.create_connection(path: path)
performed_migrations = find_performed_migrations(connection)
path = File.join(::Migrations.root_path, "db", "schema")
migrate_from_path(connection, path, performed_migrations)
connection.close
end
private
def new_database?(connection)
connection.query_single_splat(<<~SQL) == 0
SELECT COUNT(*)
FROM sqlite_schema
WHERE type = 'table' AND name = 'schema_migrations'
SQL
end
def find_performed_migrations(connection)
return Set.new if new_database?(connection)
connection.query_splat(<<~SQL).to_set
SELECT path
FROM schema_migrations
SQL
end
def migrate_from_path(connection, migration_path, performed_migrations)
file_pattern = File.join(migration_path, "*.sql")
Dir[file_pattern].sort.each do |path|
relative_path = Pathname(path).relative_path_from(Migrations.root_path).to_s
unless performed_migrations.include?(relative_path)
sql = File.read(path)
sql_hash = Digest::SHA1.hexdigest(sql)
connection.execute(sql)
connection.execute(<<~SQL, path: relative_path, sql_hash: sql_hash)
INSERT INTO schema_migrations (path, created_at, sql_hash)
VALUES (:path, datetime('now'), :sql_hash)
SQL
end
end
end
end
end
end

@ -0,0 +1,25 @@
# frozen_string_literal: true
class Migrations
class PreparedStatementCache < ::LruRedux::Cache
class PreparedStatementHash < Hash
def shift
result = super
if (stmt = result[1])
stmt.close
end
result
end
def clear
each_value(&:close)
super
end
end
def initialize(*args)
super
@data = PreparedStatementHash.new
end
end
end

@ -0,0 +1,7 @@
# frozen_string_literal: true
gemfile do
source "https://rubygems.org"
gem "hashids"
end

@ -0,0 +1,61 @@
# frozen_string_literal: true
require "bundler/inline"
require "bundler/ui"
module Migrations
def self.root_path
@root_path ||= File.expand_path("..", __dir__)
end
def self.load_gemfiles(*relative_paths)
gemfiles_root_path = File.join(Migrations.root_path, "config/gemfiles")
relative_paths.each do |relative_path|
path = File.join(File.expand_path(relative_path, gemfiles_root_path), "Gemfile")
unless File.exist?(path)
warn "Could not find Gemfile at #{path}"
exit 1
end
gemfile_content = File.read(path)
# Create new UI and set level to confirm to avoid printing unnecessary messages
bundler_ui = Bundler::UI::Shell.new
bundler_ui.level = "confirm"
begin
gemfile(true, ui: bundler_ui) do
# rubocop:disable Security/Eval
eval(gemfile_content, nil, path, 1)
# rubocop:enable Security/Eval
end
rescue Bundler::BundlerError => e
warn "\e[31m#{e.message}\e[0m"
exit 1
end
end
end
def self.load_rails_environment(quiet: false)
puts "Loading application..." unless quiet
rails_root = File.expand_path("../..", __dir__)
# rubocop:disable Discourse/NoChdir
Dir.chdir(rails_root) { require File.join(rails_root, "config/environment") }
# rubocop:enable Discourse/NoChdir
end
def self.configure_zeitwerk(*directories)
require "zeitwerk"
root_path = Migrations.root_path
loader = Zeitwerk::Loader.new
directories.each do |dir|
loader.push_dir(File.expand_path(dir, root_path), namespace: Migrations)
end
loader.setup
end
end

@ -0,0 +1,203 @@
#!/usr/bin/env ruby
# frozen_string_literal: true
require "bundler/inline"
require "benchmark"
require "tempfile"
gemfile(true) do
source "https://rubygems.org"
gem "extralite-bundle", require: "extralite"
gem "sqlite3"
gem "duckdb"
end
ROW_COUNT = 50_000_000
SOME_DATA = ["The quick, brown fox jumps over a lazy dog.", 1_234_567_890]
def with_db_path
tempfile = Tempfile.new
yield tempfile.path
ensure
tempfile.close
tempfile.unlink
end
module Sqlite
TRANSACTION_SIZE = 1000
CREATE_TABLE_SQL = <<~SQL
CREATE TABLE foo
(
id INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT,
some_text TEXT,
some_number INTEGER
)
SQL
INSERT_SQL = "INSERT INTO foo (some_text, some_number) VALUES (?, ?)"
class Sqlite3Benchmark
def initialize(row_count)
@row_count = row_count
@tempfile = Tempfile.new
@connection = SQLite3::Database.new(@tempfile.path)
@connection.journal_mode = "wal"
@connection.synchronous = "off"
@connection.temp_store = "memory"
@connection.locking_mode = "normal"
@connection.cache_size = -10_000 # 10_000 pages
@connection.execute(CREATE_TABLE_SQL)
@stmt = @connection.prepare(INSERT_SQL)
@statement_counter = 0
end
def run
@row_count.times { insert(SOME_DATA) }
close
end
private
def insert(*parameters)
begin_transaction if @statement_counter == 0
@stmt.execute(*parameters)
if (@statement_counter += 1) > TRANSACTION_SIZE
commit_transaction
@statement_counter = 0
end
end
def begin_transaction
return if @connection.transaction_active?
@connection.transaction(:deferred)
end
def commit_transaction
return unless @connection.transaction_active?
@connection.commit
end
def close
commit_transaction
@stmt.close
@connection.close
@tempfile.close
@tempfile.unlink
end
end
class ExtraliteBenchmark
def initialize(row_count)
@row_count = row_count
@tempfile = Tempfile.new
@connection = Extralite::Database.new(@tempfile.path)
@connection.pragma(
journal_mode: "wal",
synchronous: "off",
temp_store: "memory",
locking_mode: "normal",
cache_size: -10_000, # 10_000 pages
)
@connection.execute(CREATE_TABLE_SQL)
@stmt = @connection.prepare(INSERT_SQL)
@statement_counter = 0
end
def run
@row_count.times { insert(SOME_DATA) }
close
end
private
def insert(*parameters)
begin_transaction if @statement_counter == 0
@stmt.execute(*parameters)
if (@statement_counter += 1) > TRANSACTION_SIZE
commit_transaction
@statement_counter = 0
end
end
def begin_transaction
return if @connection.transaction_active?
@connection.execute("BEGIN DEFERRED TRANSACTION")
end
def commit_transaction
return unless @connection.transaction_active?
@connection.execute("COMMIT")
end
def close
commit_transaction
@stmt.close
@connection.close
@tempfile.close
@tempfile.unlink
end
end
end
class DuckDbBenchmark
CREATE_TABLE_SQL = <<~SQL
CREATE TABLE foo
(
id INTEGER NOT NULL PRIMARY KEY,
some_text TEXT,
some_number INTEGER
)
SQL
def initialize(row_count)
@row_count = row_count
@tempfile = Tempfile.new
FileUtils.rm(@tempfile.path)
@db = DuckDB::Database.open(@tempfile.path)
@connection = @db.connect
@connection.query(CREATE_TABLE_SQL)
@appender = @connection.appender("foo")
end
def run
@row_count.times do |id|
@appender.begin_row
@appender.append(id)
@appender.append(SOME_DATA[0])
@appender.append(SOME_DATA[1])
@appender.end_row
end
close
end
private
def close
@appender.close
@connection.close
@db.close
end
end
Benchmark.bm(15) do |x|
x.report("SQLite3") { Sqlite::Sqlite3Benchmark.new(ROW_COUNT).run }
x.report("Extralite") { Sqlite::ExtraliteBenchmark.new(ROW_COUNT).run }
x.report("DuckDB") { DuckDbBenchmark.new(ROW_COUNT).run }
end

@ -0,0 +1,114 @@
#!/usr/bin/env ruby
# frozen_string_literal: true
require "bundler/inline"
gemfile(true) do
source "https://rubygems.org"
gem "benchmark-ips"
gem "extralite-bundle", github: "digital-fabric/extralite"
gem "sqlite3"
end
require "extralite"
require "benchmark/ips"
require "time"
require "securerandom"
User = Data.define(:id, :name, :email, :created_at)
USER_HASH =
begin
name = SecureRandom.hex(10)
{ id: 1, name: name, email: "#{name}@example.com", created_at: Time.now.utc.iso8601 }
end
USER_DATA =
User.new(
id: USER_HASH[:id],
name: USER_HASH[:name],
email: USER_HASH[:email],
created_at: USER_HASH[:created_at],
)
SQL_TABLE = <<~SQL
CREATE TABLE users (
id INTEGER,
name TEXT,
email TEXT,
created_at DATETIME
)
SQL
SQL_INSERT = "INSERT INTO users VALUES (?, ?, ?, ?)"
SQL_INSERT_NAMED = "INSERT INTO users VALUES (:id, :name, :email, :created_at)"
def create_extralite_db
db = Extralite::Database.new(":memory:")
db.execute(SQL_TABLE)
db
end
def create_sqlite3_db
db = SQLite3::Database.new(":memory:")
db.execute(SQL_TABLE)
db
end
def create_users(row_count)
row_count.times.map { |id| }
end
def insert_extralite_regular(stmt, user)
stmt.execute(user.id, user.name, user.email, user.created_at)
end
def insert_extralite_hash(stmt, user)
stmt.execute(user)
end
def insert_extralite_data(stmt, user)
stmt.execute(user)
end
def insert_sqlite3_regular(stmt, user)
stmt.execute(user.id, user.name, user.email, user.created_at)
end
def insert_sqlite3_hash(stmt, user)
stmt.execute(user)
end
puts "",
"Extralite SQLite version: #{Extralite.sqlite3_version}",
"SQLite version: #{SQLite3::SQLITE_VERSION}",
""
extralite_db = create_extralite_db
extralite_stmt_regular = extralite_db.prepare(SQL_INSERT)
extralite_stmt_named = extralite_db.prepare(SQL_INSERT_NAMED)
sqlite3_db = create_sqlite3_db
sqlite3_stmt_regular = sqlite3_db.prepare(SQL_INSERT)
sqlite3_stmt_named = sqlite3_db.prepare(SQL_INSERT_NAMED)
Benchmark.ips do |x|
x.config(time: 10, warmup: 2)
x.report("Extralite regular") { insert_extralite_regular(extralite_stmt_regular, USER_DATA) }
x.report("Extralite hash") { insert_extralite_hash(extralite_stmt_named, USER_HASH) }
x.report("Extralite data") { insert_extralite_data(extralite_stmt_regular, USER_DATA) }
x.report("Extralite data/array") do
insert_extralite_data(extralite_stmt_regular, USER_DATA.deconstruct)
end
x.report("SQLite3 regular") { insert_sqlite3_regular(sqlite3_stmt_regular, USER_DATA) }
x.report("SQLite3 hash") { insert_sqlite3_hash(sqlite3_stmt_named, USER_HASH) }
x.report("SQLite3 data/hash") { insert_sqlite3_hash(sqlite3_stmt_named, USER_DATA.to_h) }
x.compare!
end
extralite_stmt_regular.close
extralite_stmt_named.close
extralite_db.close
sqlite3_stmt_regular.close
sqlite3_stmt_named.close
sqlite3_db.close

@ -0,0 +1,105 @@
#!/usr/bin/env ruby
# frozen_string_literal: true
require "bundler/inline"
gemfile(true) do
source "https://rubygems.org"
gem "benchmark-ips"
gem "extralite-bundle", github: "digital-fabric/extralite"
gem "sqlite3"
end
require "extralite"
require "benchmark/ips"
require "time"
require "securerandom"
SQL_TABLE = <<~SQL
CREATE TABLE users (
id INTEGER,
name TEXT,
email TEXT,
created_at DATETIME
)
SQL
SQL_INSERT = "INSERT INTO users VALUES (?, ?, ?, ?)"
SQL_INSERT_NAMED = "INSERT INTO users VALUES (:id, :name, :email, :created_at)"
def create_extralite_db
db = Extralite::Database.new(":memory:")
db.execute(SQL_TABLE)
db
end
def create_sqlite3_db
db = SQLite3::Database.new(":memory:")
db.execute(SQL_TABLE)
db
end
def create_users(row_count)
row_count.times.map do |id|
name = SecureRandom.hex(10)
{ id: id, name: name, email: "#{name}@example.com", created_at: Time.now.utc.iso8601 }
end
end
def insert_extralite_regular(stmt, users)
users.each { |user| stmt.execute(user[:id], user[:name], user[:email], user[:created_at]) }
end
def insert_extralite_index(stmt, users)
users.each { |user| stmt.execute(user) }
end
def insert_extralite_named(stmt, users)
users.each { |user| stmt.execute(user) }
end
def insert_sqlite3_regular(stmt, users)
users.each { |user| stmt.execute(user[:id], user[:name], user[:email], user[:created_at]) }
end
def insert_sqlite3_named(stmt, users)
users.each { |user| stmt.execute(user) }
end
puts "",
"Extralite SQLite version: #{Extralite.sqlite3_version}",
"SQLite version: #{SQLite3::SQLITE_VERSION}",
""
extralite_db = create_extralite_db
extralite_stmt_regular = extralite_db.prepare(SQL_INSERT)
extralite_stmt_named = extralite_db.prepare(SQL_INSERT_NAMED)
sqlite3_db = create_sqlite3_db
sqlite3_stmt_regular = sqlite3_db.prepare(SQL_INSERT)
sqlite3_stmt_named = sqlite3_db.prepare(SQL_INSERT_NAMED)
users = create_users(1_000)
users_indexed =
users.map do |user|
{ 1 => user[:id], 2 => user[:name], 3 => user[:email], 4 => user[:created_at] }
end
users_array = users.map { |user| [user[:id], user[:name], user[:email], user[:created_at]] }
Benchmark.ips do |x|
x.config(time: 10, warmup: 2)
x.report("Extralite regular") { insert_extralite_regular(extralite_stmt_regular, users) }
x.report("Extralite named") { insert_extralite_named(extralite_stmt_named, users) }
x.report("Extralite index") { insert_extralite_index(extralite_stmt_regular, users_indexed) }
x.report("Extralite array") { insert_extralite_index(extralite_stmt_regular, users_array) }
x.report("SQLite3 regular") { insert_sqlite3_regular(sqlite3_stmt_regular, users) }
x.report("SQLite3 named") { insert_sqlite3_named(sqlite3_stmt_named, users) }
x.compare!
end
extralite_stmt_regular.close
extralite_stmt_named.close
extralite_db.close
sqlite3_stmt_regular.close
sqlite3_stmt_named.close
sqlite3_db.close

@ -0,0 +1,25 @@
#!/usr/bin/env ruby
# frozen_string_literal: true
require "bundler/inline"
gemfile(true) do
source "https://rubygems.org"
gem "benchmark-ips"
end
require "benchmark/ips"
require "time"
THE_TIME = Time.now.utc
DATE_TIME = DateTime.now.new_offset(0)
Benchmark.ips do |x|
x.config(time: 10, warmup: 2)
x.report("Time#iso8601") { THE_TIME.iso8601 }
x.report("Time#strftime") { THE_TIME.strftime("%FT%TZ") }
x.report("DateTime#iso8601") { DATE_TIME.iso8601 }
x.compare!
end

@ -0,0 +1,186 @@
#!/usr/bin/env ruby
# frozen_string_literal: true
require "bundler/inline"
gemfile(true) do
source "https://rubygems.org"
gem "extralite-bundle", github: "digital-fabric/extralite"
end
require "etc"
require "extralite"
require "tempfile"
SQL_TABLE = <<~SQL
CREATE TABLE users (
id INTEGER,
name TEXT,
email TEXT,
created_at DATETIME
)
SQL
SQL_INSERT = "INSERT INTO users VALUES (?, ?, ?, ?)"
USER = [1, "John", "john@example.com", "2023-12-29T11:10:04Z"]
ROW_COUNT = Etc.nprocessors * 200_000
def create_extralite_db(path, initialize: false)
db = Extralite::Database.new(path)
db.pragma(
busy_timeout: 60_000, # 60 seconds
journal_mode: "wal",
synchronous: "off",
)
db.execute(SQL_TABLE) if initialize
db
end
def with_db_path
tempfile = Tempfile.new
db = create_extralite_db(tempfile.path, initialize: true)
db.close
yield tempfile.path
db = create_extralite_db(tempfile.path)
row_count = db.query_single_value("SELECT COUNT(*) FROM users")
puts "Row count: #{row_count}" if row_count != ROW_COUNT
db.close
ensure
tempfile.close
tempfile.unlink
end
class SingleWriter
def initialize(db_path, row_count)
@row_count = row_count
@db = create_extralite_db(db_path)
@stmt = @db.prepare(SQL_INSERT)
end
def write
@row_count.times { @stmt.execute(USER) }
@stmt.close
@db.close
end
end
class ForkedSameDbWriter
def initialize(db_path, row_count)
@row_count = row_count
@db_path = db_path
@pids = []
setup_forks
end
def setup_forks
fork_count = Etc.nprocessors
split_row_count = @row_count / fork_count
fork_count.times do
@pids << fork do
db = create_extralite_db(@db_path)
stmt = db.prepare(SQL_INSERT)
Signal.trap("USR1") do
split_row_count.times { stmt.execute(USER) }
stmt.close
db.close
exit
end
sleep
end
end
sleep(1)
end
def write
@pids.each { |pid| Process.kill("USR1", pid) }
Process.waitall
end
end
class ForkedMultiDbWriter
def initialize(db_path, row_count)
@row_count = row_count
@complete_db_path = db_path
@pids = []
@db_paths = []
@db = create_extralite_db(db_path)
setup_forks
end
def setup_forks
fork_count = Etc.nprocessors
split_row_count = @row_count / fork_count
fork_count.times do |i|
db_path = "#{@complete_db_path}-#{i}"
@db_paths << db_path
@pids << fork do
db = create_extralite_db(db_path, initialize: true)
stmt = db.prepare(SQL_INSERT)
Signal.trap("USR1") do
split_row_count.times { stmt.execute(USER) }
stmt.close
db.close
exit
end
sleep
end
end
sleep(2)
end
def write
@pids.each { |pid| Process.kill("USR1", pid) }
Process.waitall
@db_paths.each do |db_path|
@db.execute("ATTACH DATABASE ? AS db", db_path)
@db.execute("INSERT INTO users SELECT * FROM db.users")
@db.execute("DETACH DATABASE db")
end
@db.close
end
end
LABEL_WIDTH = 25
def benchmark(label, label_width = 15)
print "#{label} ..."
label = label.ljust(label_width)
start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
yield
finish = Process.clock_gettime(Process::CLOCK_MONOTONIC)
time_diff = sprintf("%.4f", finish - start).rjust(9)
print "\r#{label} #{time_diff} seconds\n"
end
puts "", "Benchmarking write performance", ""
with_db_path do |db_path|
single_writer = SingleWriter.new(db_path, ROW_COUNT)
benchmark("single writer", LABEL_WIDTH) { single_writer.write }
end
with_db_path do |db_path|
forked_same_db_writer = ForkedSameDbWriter.new(db_path, ROW_COUNT)
benchmark("forked writer - same DB", LABEL_WIDTH) { forked_same_db_writer.write }
end
with_db_path do |db_path|
forked_multi_db_writer = ForkedMultiDbWriter.new(db_path, ROW_COUNT)
benchmark("forked writer - multi DB", LABEL_WIDTH) { forked_multi_db_writer.write }
end

@ -7,13 +7,15 @@
# It accepts an optional command line argument for the output file path which
# overrides the path configured in schema.yml
puts "Loading application..."
require_relative "../../config/environment"
require_relative "../lib/migrations"
module Migrations
load_rails_environment
load_gemfiles("common")
class SchemaGenerator
def initialize(opts = {})
config = YAML.load_file(File.join(__dir__, "schema.yml"), symbolize_names: true)
config = load_config
@core_db_connection = ActiveRecord::Base.connection
@output_stream = StringIO.new
@ -25,7 +27,7 @@ module Migrations
@column_configs = config[:columns]
@configured_table_names = @table_configs&.keys&.sort || []
@global_column_ignore_list = @column_configs[:ignore] || []
@global_column_ignore_list = @column_configs&.fetch(:ignore) || []
end
def run
@ -35,13 +37,21 @@ module Migrations
generate_tables
generate_indirectly_ignored_columns_log
generate_migration_file
validate_migration_file
puts "", "Done"
end
private
def load_config
path = File.expand_path("../config/intermediate_db.yml", __dir__)
YAML.load_file(path, symbolize_names: true)
end
def generate_header
return if @configured_table_names.empty?
@output_stream.puts <<~HEADER
/*
This file is auto-generated from the Discourse core database schema. Instead of editing it directly,
@ -61,6 +71,8 @@ module Migrations
end
def generate_indirectly_ignored_columns_log
return if @indirectly_ignored_columns.empty?
puts "Generating indirectly ignored column list..."
@output_stream.puts "\n\n/*"
@ -142,7 +154,8 @@ module Migrations
end
@output_stream.puts ""
@output_stream.puts "CREATE TABLE #{name} ("
@output_stream.puts "CREATE TABLE #{name}"
@output_stream.puts "("
if !composite_key && primary_key.present?
primary_key_column = column_records.find { |c| c.name == primary_key }
@ -166,7 +179,9 @@ module Migrations
column_definitions << generate_column_definition(column)
end
column_definitions << " PRIMARY KEY (#{primary_key.join(", ")})" if composite_key
format_columns!(column_definitions)
column_definitions << " PRIMARY KEY (#{primary_key.join(", ")})" if composite_key
@output_stream.puts column_definitions.join(",\n")
@output_stream.puts ");"
@ -175,6 +190,36 @@ module Migrations
indexes.each { |index| generate_index(name, index) }
end
def validate_migration_file
db = Extralite::Database.new(":memory:")
if (sql = @output_stream.string).blank?
warn "No SQL generated, skipping validation".red
else
db.execute(sql)
end
ensure
db.close if db
end
def format_columns!(column_definitions)
column_definitions.map! do |c|
c.match(
/^\s*(?<name>\w+)\s(?<datatype>\w+)\s?(?<nullable>NOT NULL)?\s?(?<primary_key>PRIMARY KEY)?/,
).named_captures
end
max_name_length = column_definitions.map { |c| c["name"].length }.max
max_datatype_length = column_definitions.map { |c| c["datatype"].length }.max
column_definitions.sort_by! do |c|
[c["primary_key"] ? 0 : 1, c["nullable"] ? 0 : 1, c["name"]]
end
column_definitions.map! do |c|
" #{c["name"].ljust(max_name_length)} #{c["datatype"].ljust(max_datatype_length)} #{c["nullable"]} #{c["primary_key"]}".rstrip
end
end
class CustomColumn
attr_reader :name

@ -1,7 +1,11 @@
# frozen_string_literal: true
RSpec.describe "Migrations::Import" do
subject(:cli) { system("migrations/import", exception: true) }
subject(:cli) do
# rubocop:disable Discourse/NoChdir
Dir.chdir("migrations") { system("bin/import", exception: true) }
# rubocop:enable Discourse/NoChdir
end
it "works" do
expect { cli }.to output(

@ -0,0 +1,43 @@
# frozen_string_literal: true
require_relative "../../lib/migrations"
RSpec.describe Migrations do
describe ".root_path" do
it "returns the root path" do
expect(described_class.root_path).to eq(File.expand_path("../..", __dir__))
end
end
describe ".load_gemfiles" do
it "exits with error if the gemfile does not exist" do
relative_path = "does_not_exist"
expect { described_class.load_gemfiles(relative_path) }.to output(
include("Could not find Gemfile").and include(relative_path)
).to_stderr.and raise_error(SystemExit) { |error| expect(error.status).to eq(1) }
end
def with_temporary_root_path
Dir.mktmpdir do |temp_dir|
described_class.stubs(:root_path).returns(temp_dir)
yield temp_dir
end
end
it "exits with an error if the required Ruby version isn't found" do
with_temporary_root_path do |root_path|
gemfile_path = File.join(root_path, "config/gemfiles/test/Gemfile")
FileUtils.mkdir_p(File.dirname(gemfile_path))
File.write(gemfile_path, <<~GEMFILE)
source "http://localhost"
ruby "~> 100.0.0"
GEMFILE
expect { described_class.load_gemfiles("test") }.to output(
include("your Gemfile specified ~> 100.0.0"),
).to_stderr.and raise_error(SystemExit) { |error| expect(error.status).to eq(1) }
end
end
end
end

@ -0,0 +1,20 @@
# frozen_string_literal: true
# we need to require the rails_helper from core to load the Rails environment
require_relative "../../spec/rails_helper"
require "bundler/inline"
require "bundler/ui"
# this is a hack to allow us to load Gemfiles for converters
Dir[File.expand_path("../config/gemfiles/**/Gemfile", __dir__)].each do |path|
# Create new UI and set level to confirm to avoid printing unnecessary messages
bundler_ui = Bundler::UI::Shell.new
bundler_ui.level = "confirm"
gemfile(true, ui: bundler_ui) do
# rubocop:disable Security/Eval
eval(File.read(path), nil, path, 1)
# rubocop:enable Security/Eval
end
end