diff --git a/script/import_scripts/base/generic_database.rb b/script/import_scripts/base/generic_database.rb new file mode 100644 index 00000000000..5b7df59cf4b --- /dev/null +++ b/script/import_scripts/base/generic_database.rb @@ -0,0 +1,220 @@ +require 'sqlite3' + +module ImportScripts + class GenericDatabase + def initialize(directory, batch_size:, recreate: false) + filename = "#{directory}/index.db" + File.delete(filename) if recreate && File.exists?(filename) + + @db = SQLite3::Database.new(filename, results_as_hash: true) + @batch_size = batch_size + + configure_database + create_category_table + create_user_table + create_topic_table + create_post_table + end + + def insert_category(category) + @db.execute(<<-SQL, prepare(category)) + INSERT OR REPLACE INTO category (id, name, description, position, url) + VALUES (:id, :name, :description, :position, :url) + SQL + end + + def insert_user(user) + @db.execute(<<-SQL, prepare(user)) + INSERT OR REPLACE INTO user (id, email, username, name, created_at, last_seen_at, active) + VALUES (:id, :email, :username, :name, :created_at, :last_seen_at, :active) + SQL + end + + def insert_topic(topic) + @db.execute(<<-SQL, prepare(topic)) + INSERT OR REPLACE INTO topic (id, title, raw, category_id, closed, user_id, created_at, url) + VALUES (:id, :title, :raw, :category_id, :closed, :user_id, :created_at, :url) + SQL + end + + def insert_post(post) + @db.execute(<<-SQL, prepare(post)) + INSERT OR REPLACE INTO post (id, raw, topic_id, user_id, created_at, reply_to_post_id, url) + VALUES (:id, :raw, :topic_id, :user_id, :created_at, :reply_to_post_id, :url) + SQL + end + + def sort_posts_by_created_at + @db.execute 'DELETE FROM post_order' + + @db.execute <<-SQL + INSERT INTO post_order (id) + SELECT id + FROM post + ORDER BY created_at, topic_id, id + SQL + end + + def fetch_categories + @db.execute(<<-SQL) + SELECT * + FROM category + ORDER BY position, name + SQL + end + + def count_users + @db.get_first_value(<<-SQL) + SELECT COUNT(*) + FROM user + SQL + end + + def fetch_users(last_id) + rows = @db.execute(<<-SQL, last_id) + SELECT * + FROM user + WHERE id > :last_id + ORDER BY id + LIMIT #{@batch_size} + SQL + + add_last_column_value(rows, 'id') + end + + def count_topics + @db.get_first_value(<<-SQL) + SELECT COUNT(*) + FROM topic + SQL + end + + def fetch_topics(last_id) + rows = @db.execute(<<-SQL, last_id) + SELECT * + FROM topic + WHERE id > :last_id + ORDER BY id + LIMIT #{@batch_size} + SQL + + add_last_column_value(rows, 'id') + end + + def count_posts + @db.get_first_value(<<-SQL) + SELECT COUNT(*) + FROM post + SQL + end + + def fetch_posts(last_row_id) + rows = @db.execute(<<-SQL, last_row_id) + SELECT o.ROWID, p.* + FROM post p + JOIN post_order o USING (id) + WHERE o.ROWID > :last_row_id + ORDER BY o.ROWID + LIMIT #{@batch_size} + SQL + + add_last_column_value(rows, 'rowid') + end + + def execute_sql(sql) + @db.execute(sql) + end + + private + + def configure_database + @db.execute 'PRAGMA journal_mode = OFF' + @db.execute 'PRAGMA locking_mode = EXCLUSIVE' + end + + def create_category_table + @db.execute <<-SQL + CREATE TABLE IF NOT EXISTS category ( + id TEXT NOT NULL PRIMARY KEY, + name TEXT NOT NULL, + description TEXT, + position INTEGER, + url TEXT + ) + SQL + end + + def create_user_table + @db.execute <<-SQL + CREATE TABLE IF NOT EXISTS user ( + id TEXT NOT NULL PRIMARY KEY, + email TEXT, + username TEXT, + name TEXT, + created_at DATETIME, + last_seen_at DATETIME, + active BOOLEAN NOT NULL DEFAULT true + ) + SQL + end + + def create_topic_table + @db.execute <<-SQL + CREATE TABLE IF NOT EXISTS topic ( + id TEXT NOT NULL PRIMARY KEY, + title TEXT, + raw TEXT, + category_id TEXT NOT NULL, + closed BOOLEAN NOT NULL DEFAULT false, + user_id TEXT NOT NULL, + created_at DATETIME, + url TEXT + ) + SQL + + @db.execute 'CREATE INDEX IF NOT EXISTS topic_by_user_id ON topic (user_id)' + end + + def create_post_table + @db.execute <<-SQL + CREATE TABLE IF NOT EXISTS post ( + id TEXT NOT NULL PRIMARY KEY, + raw TEXT, + topic_id TEXT NOT NULL, + user_id TEXT NOT NULL, + created_at DATETIME, + reply_to_post_id TEXT, + url TEXT + ) + SQL + + @db.execute 'CREATE INDEX IF NOT EXISTS post_by_user_id ON post (user_id)' + + @db.execute <<-SQL + CREATE TABLE IF NOT EXISTS post_order ( + id TEXT NOT NULL PRIMARY KEY + ) + SQL + end + + def prepare(hash) + hash.each do |key, value| + if value.is_a?(TrueClass) || value.is_a?(FalseClass) + hash[key] = value ? 1 : 0 + elsif value.is_a?(Date) + hash[key] = value.to_s + end + end + end + + def add_last_column_value(rows, *last_columns) + return rows if last_columns.empty? + + result = [rows] + last_row = rows.last + + last_columns.each { |column| result.push(last_row ? last_row[column] : nil) } + result + end + end +end diff --git a/script/import_scripts/zendesk.rb b/script/import_scripts/zendesk.rb new file mode 100644 index 00000000000..305431c7a52 --- /dev/null +++ b/script/import_scripts/zendesk.rb @@ -0,0 +1,248 @@ +# Zendesk importer +# +# You will need a bunch of CSV files: +# +# - users.csv +# - topics.csv (topics in Zendesk are categories in Discourse) +# - posts.csv (posts in Zendesk are topics in Discourse) +# - comments.csv (comments in Zendesk are posts in Discourse) + +require 'csv' +require 'reverse_markdown' +require_relative 'base' +require_relative 'base/generic_database' + +# Call it like this: +# RAILS_ENV=production bundle exec ruby script/import_scripts/zendesk.rb DIRNAME +class ImportScripts::Zendesk < ImportScripts::Base + OLD_DOMAIN = "https://support.example.com" + BATCH_SIZE = 1000 + + def initialize(path) + super() + + @path = path + @db = ImportScripts::GenericDatabase.new(@path, batch_size: BATCH_SIZE, recreate: true) + end + + def execute + read_csv_files + + import_categories + import_users + import_topics + import_posts + end + + def read_csv_files + puts "", "reading CSV files" + + csv_parse("topics") do |row| + @db.insert_category( + id: row[:id], + name: row[:name], + description: row[:description], + position: row[:position], + url: row[:htmlurl] + ) + end + + csv_parse("users") do |row| + @db.insert_user( + id: row[:id], + email: row[:email], + name: row[:name], + created_at: parse_datetime(row[:createdat]), + last_seen_at: parse_datetime(row[:lastloginat]), + active: true + ) + end + + csv_parse("posts") do |row| + @db.insert_topic( + id: row[:id], + title: row[:title], + raw: row[:details], + category_id: row[:topicid], + closed: row[:closed] == "TRUE", + user_id: row[:authorid], + created_at: parse_datetime(row[:createdat]), + url: row[:htmlurl] + ) + end + + csv_parse("comments") do |row| + @db.insert_post( + id: row[:id], + raw: row[:body], + topic_id: row[:postid], + user_id: row[:authorid], + created_at: parse_datetime(row[:createdat]), + url: row[:htmlurl] + ) + end + + @db.execute_sql(<<~SQL) + DELETE FROM user + WHERE NOT EXISTS( + SELECT 1 + FROM topic + WHERE topic.user_id = user.id + ) AND NOT EXISTS( + SELECT 1 + FROM post + WHERE post.user_id = user.id + ) + SQL + + @db.sort_posts_by_created_at + end + + def parse_datetime(text) + return nil if text.blank? || text == "null" + DateTime.parse(text) + end + + def import_categories + puts "", "creating categories" + rows = @db.fetch_categories + + create_categories(rows) do |row| + { + id: row['id'], + name: row['name'], + description: row['description'], + position: row['position'], + post_create_action: proc do |category| + url = remove_domain(row['url']) + Permalink.create(url: url, category_id: category.id) unless permalink_exists?(url) + end + } + end + end + + def batches + super(BATCH_SIZE) + end + + def import_users + puts "", "creating users" + total_count = @db.count_users + last_id = '' + + batches do |offset| + rows, last_id = @db.fetch_users(last_id) + break if rows.empty? + + next if all_records_exist?(:users, rows.map { |row| row['id'] }) + + create_users(rows, total: total_count, offset: offset) do |row| + { + id: row['id'], + email: row['email'], + name: row['name'], + created_at: row['created_at'], + last_seen_at: row['last_seen_at'], + active: row['active'] == 1 + } + end + end + end + + def import_topics + puts "", "creating topics" + total_count = @db.count_topics + last_id = '' + + batches do |offset| + rows, last_id = @db.fetch_topics(last_id) + break if rows.empty? + + next if all_records_exist?(:posts, rows.map { |row| import_topic_id(row['id']) }) + + create_posts(rows, total: total_count, offset: offset) do |row| + { + id: import_topic_id(row['id']), + title: row['title'].present? ? row['title'].strip[0...255] : "Topic title missing", + raw: normalize_raw(row['raw']), + category: category_id_from_imported_category_id(row['category_id']), + user_id: user_id_from_imported_user_id(row['user_id']) || Discourse.system_user.id, + created_at: row['created_at'], + closed: row['closed'] == 1, + post_create_action: proc do |post| + url = remove_domain(row['url']) + Permalink.create(url: url, topic_id: post.topic.id) unless permalink_exists?(url) + end + } + end + end + end + + def import_topic_id(topic_id) + "T#{topic_id}" + end + + def import_posts + puts "", "creating posts" + total_count = @db.count_posts + last_row_id = 0 + + batches do |offset| + rows, last_row_id = @db.fetch_posts(last_row_id) + break if rows.empty? + + next if all_records_exist?(:posts, rows.map { |row| row['id'] }) + + create_posts(rows, total: total_count, offset: offset) do |row| + topic = topic_lookup_from_imported_post_id(import_topic_id(row['topic_id'])) + + if topic.nil? + p "MISSING TOPIC #{row['topic_id']}" + p row + next + end + + { + id: import_topic_id(row['id']), + raw: normalize_raw(row['raw']), + user_id: user_id_from_imported_user_id(row['user_id']) || Discourse.system_user.id, + topic_id: topic[:topic_id], + created_at: row['created_at'], + post_create_action: proc do |post| + url = remove_domain(row['url']) + Permalink.create(url: url, post_id: post.id) unless permalink_exists?(url) + end + } + end + end + end + + def normalize_raw(raw) + raw = raw.gsub('\n', '') + raw = ReverseMarkdown.convert(raw) + raw + end + + def remove_domain(url) + url.sub(OLD_DOMAIN, "") + end + + def permalink_exists?(url) + Permalink.find_by(url: url) + end + + def csv_parse(table_name) + CSV.foreach(File.join(@path, "#{table_name}.csv"), + headers: true, + header_converters: :symbol, + skip_blanks: true, + encoding: 'bom|utf-8') { |row| yield row } + end +end + +unless ARGV[0] && Dir.exist?(ARGV[0]) + puts "", "Usage:", "", "bundle exec ruby script/import_scripts/zendesk.rb DIRNAME", "" + exit 1 +end + +ImportScripts::Zendesk.new(ARGV[0]).perform