discourse/migrations/scripts/generate_schema
Gerhard Schlager 7c3a29c9d6
DEV: Add converter framework for migrations-tooling (#28540)
* Updates GitHub Actions
* Switches from `bundler/inline` to an optional group in the `Gemfile` because the previous solution didn't work well with rspec
* Adds the converter framework and tests
* Allows loading private converters (see README)
* Switches from multiple CLI tools to a single CLI
* Makes DB connections reusable and adds a new abstraction for the `IntermediateDB`
* `IntermediateDB` acts as an interface for IPC calls when a converter steps runs in parallel (forks). Only the main process writes to the DB.
* Includes a simple example implementation of a converter for now.
2024-09-09 17:14:39 +02:00

308 lines
9.0 KiB
Ruby
Executable File

#!/usr/bin/env ruby
# frozen_string_literal: true
# Generate the converter's base intermediate database migration file from
# the core database state and YAML configuration in schema.yml
# Invoke from core root directory as `./migrations/scripts/generate_schema`
# It accepts an optional command line argument for the output file path which
# overrides the path configured in schema.yml
require_relative "../lib/migrations"
module Migrations
load_rails_environment
class SchemaGenerator
def initialize(opts = {})
config = load_config
@core_db_connection = ActiveRecord::Base.connection
@output_stream = StringIO.new
@indirectly_ignored_columns = Hash.new { |h, k| h[k] = [] }
@output_file_path = opts[:output_file_path] || config[:output_file_path]
@table_configs = config[:tables]
@column_configs = config[:columns]
@configured_table_names = @table_configs&.keys&.sort || []
@global_column_ignore_list = @column_configs&.fetch(:ignore) || []
end
def run
puts "Generating base converter migration file for Discourse #{Discourse::VERSION::STRING}"
generate_header
generate_tables
generate_indirectly_ignored_columns_log
generate_migration_file
validate_migration_file
puts "", "Done"
end
private
def load_config
path = File.expand_path("../config/intermediate_db.yml", __dir__)
YAML.load_file(path, symbolize_names: true)
end
def generate_header
return if @configured_table_names.empty?
@output_stream.puts <<~HEADER
/*
This file is auto-generated from the Discourse core database schema. Instead of editing it directly,
please update the `schema.yml` configuration file and re-run the `generate_schema` script to update it.
*/
HEADER
end
def generate_tables
puts "Generating tables..."
@configured_table_names.each do |name|
raise "Core table named '#{name}' not found" unless valid_table?(name)
generate_table(name)
end
end
def generate_indirectly_ignored_columns_log
return if @indirectly_ignored_columns.empty?
puts "Generating indirectly ignored column list..."
@output_stream.puts "\n\n/*"
@output_stream.puts <<~NOTE
Core table columns implicitly excluded from the generated schema above via the `include` configuration option
in `schema.yml`. This serves as an inventory of these columns, allowing new core additions to be tracked and,
if necessary, synchronized with the intermediate database schema.\n
NOTE
@indirectly_ignored_columns.each_with_index do |(table_name, columns), index|
next if virtual_table?(table_name) || columns.blank?
@output_stream.puts "" if index.positive?
@output_stream.puts "Table: #{table_name}"
@output_stream.puts "--------#{"-" * table_name.length}"
columns.each do |column|
@output_stream.puts " #{column.name} #{column.type} #{column.null}"
end
end
@output_stream.puts "*/"
end
def generate_migration_file
file_path = File.expand_path(@output_file_path, __dir__)
puts "Generating base migration file '#{file_path}'..."
File.open(file_path, "w") { |f| f << @output_stream.string.chomp }
end
def generate_column_definition(column)
definition = " #{column.name} #{type(column)}"
definition << " NOT NULL" unless column.null
definition
end
def generate_index(table_name, index)
@output_stream.print "CREATE "
@output_stream.print "UNIQUE " if index[:unique]
@output_stream.print "INDEX #{index[:name]} ON #{table_name} (#{index[:columns].join(", ")})"
@output_stream.print " #{index[:condition]}" if index[:condition].present?
@output_stream.puts ";"
end
def column_list_for(table_name)
ignore_columns = @table_configs.dig(table_name, :ignore) || []
include_columns = @table_configs.dig(table_name, :include) || []
include_columns.present? ? [:include, include_columns] : [:ignore, ignore_columns]
end
def generate_table(name)
puts "Generating #{name}..."
column_definitions = []
column_records = columns(name)
mode, column_list = column_list_for(name)
indexes = indexes(name)
configured_primary_key = primary_key(name)
primary_key, composite_key =
if configured_primary_key.present?
[configured_primary_key].flatten.each do |pk|
if column_records.map(&:name).exclude?(pk)
raise "Column named '#{pk}' does not exist in table '#{name}'"
end
end
[
configured_primary_key,
configured_primary_key.is_a?(Array) && configured_primary_key.length > 1,
]
else
virtual_table?(name) ? [] : [@core_db_connection.primary_key(name), false]
end
@output_stream.puts ""
@output_stream.puts "CREATE TABLE #{name}"
@output_stream.puts "("
if !composite_key && primary_key.present?
primary_key_column = column_records.find { |c| c.name == primary_key }
if (mode == :include && column_list.include?(primary_key_column.name)) ||
(mode == :ignore && column_list.exclude?(primary_key_column.name))
column_definitions << " #{primary_key_column.name} #{type(primary_key_column)} NOT NULL PRIMARY KEY"
end
end
column_records.each do |column|
next if @global_column_ignore_list.include?(column.name)
next if (mode == :ignore) && column_list.include?(column.name)
if !column.is_a?(CustomColumn) && (mode == :include) && column_list.exclude?(column.name)
@indirectly_ignored_columns[name] << column
next
end
next if !composite_key && (column.name == primary_key)
column_definitions << generate_column_definition(column)
end
format_columns!(column_definitions)
column_definitions << " PRIMARY KEY (#{primary_key.join(", ")})" if composite_key
@output_stream.puts column_definitions.join(",\n")
@output_stream.puts ");"
@output_stream.puts "" if indexes.present?
indexes.each { |index| generate_index(name, index) }
end
def validate_migration_file
db = Extralite::Database.new(":memory:")
if (sql = @output_stream.string).blank?
warn "No SQL generated, skipping validation".red
else
db.execute(sql)
end
ensure
db.close if db
end
def format_columns!(column_definitions)
column_definitions.map! do |c|
c.match(
/^\s*(?<name>\w+)\s(?<datatype>\w+)\s?(?<nullable>NOT NULL)?\s?(?<primary_key>PRIMARY KEY)?/,
).named_captures
end
max_name_length = column_definitions.map { |c| c["name"].length }.max
max_datatype_length = column_definitions.map { |c| c["datatype"].length }.max
column_definitions.sort_by! do |c|
[c["primary_key"] ? 0 : 1, c["nullable"] ? 0 : 1, c["name"]]
end
column_definitions.map! do |c|
" #{c["name"].ljust(max_name_length)} #{c["datatype"].ljust(max_datatype_length)} #{c["nullable"]} #{c["primary_key"]}".rstrip
end
end
class CustomColumn
attr_reader :name
def initialize(name, type, null)
@name = name
@raw_type = type
@raw_null = null
end
def type
@raw_type&.to_sym || :text
end
def null
@raw_null.nil? ? true : @raw_null
end
def merge!(other_column)
@raw_null = other_column.null if @raw_null.nil?
@raw_type ||= other_column.type
self
end
end
def columns(name)
extensions = column_extensions(name)
return extensions if virtual_table?(name)
default_columns = @core_db_connection.columns(name)
return default_columns if extensions.blank?
extended_columns =
default_columns.map do |default_column|
extension = extensions.find { |ext| ext.name == default_column.name }
if extension
extensions.delete(extension)
extension.merge!(default_column)
else
default_column
end
end
extended_columns + extensions
end
def column_extensions(name)
extensions = @table_configs.dig(name, :extend)
return [] if extensions.nil?
extensions.map { |column| CustomColumn.new(column[:name], column[:type], column[:is_null]) }
end
def type(column)
case column.type
when :string, :inet
"TEXT"
else
column.type.to_s.upcase
end
end
def valid_table?(name)
@core_db_connection.tables.include?(name.to_s) || virtual_table?(name)
end
def virtual_table?(name)
!!@table_configs.dig(name, :virtual)
end
def indexes(table_name)
@table_configs.dig(table_name, :indexes) || []
end
def primary_key(table_name)
@table_configs.dig(table_name, :primary_key)
end
end
end
::Migrations::SchemaGenerator.new(output_file_path: ARGV.first).run