discourse/script/import_scripts/higher_logic.rb
David Taylor 8a5d97ef3f
DEV: Update importers from PostUpload to UploadReference ()
Discourse stopped using PostUpload in 9db8f00b3d. Since then, these importers have been writing to the table, but any data was totally unused. This commit updates the easy cases to use UploadReference, and adds an error to the discourse_merger import script, which needs more significant work.
2023-09-27 15:01:04 +01:00

216 lines
6.0 KiB
Ruby

# frozen_string_literal: true
require "mysql2"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
class ImportScripts::HigherLogic < ImportScripts::Base
HIGHERLOGIC_DB = "higherlogic"
BATCH_SIZE = 1000
ATTACHMENT_DIR = "/shared/import/data/attachments"
def initialize
super
@client = Mysql2::Client.new(host: "localhost", username: "root", database: HIGHERLOGIC_DB)
end
def execute
import_groups
import_users
import_group_users
import_categories
import_posts
import_attachments
end
def import_groups
puts "", "importing groups"
groups = mysql_query <<-SQL
SELECT CommunityKey, CommunityName
FROM Community
ORDER BY CommunityName
SQL
create_groups(groups) { |group| { id: group["CommunityKey"], name: group["CommunityName"] } }
end
def import_users
puts "", "importing users"
total_count = mysql_query("SELECT count(*) FROM Contact").first["count"]
batches(BATCH_SIZE) do |offset|
results = mysql_query <<-SQL
SELECT ContactKey, FirstName, LastName, EmailAddress, HLAdminFlag, UserStatus, CreatedOn, Birthday, Bio
FROM Contact
LIMIT #{BATCH_SIZE}
OFFSET #{offset}
SQL
break if results.size < 1
next if all_records_exist? :users, results.map { |u| u["ContactKey"] }
create_users(results, total: total_count, offset: offset) do |user|
next if user["EmailAddress"].blank?
{
id: user["ContactKey"],
email: user["EmailAddress"],
name: "#{user["FirstName"]} #{user["LastName"]}",
created_at: user["CreatedOn"] == nil ? 0 : Time.zone.at(user["CreatedOn"]),
bio_raw: user["Bio"],
active: user["UserStatus"] == "Active",
admin: user["HLAdminFlag"] == 1,
}
end
end
end
def import_group_users
puts "", "importing group users"
group_users = mysql_query(<<-SQL).to_a
SELECT CommunityKey, ContactKey
FROM CommunityMember
SQL
group_users.each do |row|
next unless user_id = user_id_from_imported_user_id(row["ContactKey"])
next unless group_id = group_id_from_imported_group_id(row["CommunityKey"])
puts "", "."
GroupUser.find_or_create_by(user_id: user_id, group_id: group_id)
end
end
def import_categories
puts "", "importing categories"
categories = mysql_query <<-SQL
SELECT DiscussionKey, DiscussionName
FROM Discussion
SQL
create_categories(categories) do |category|
{ id: category["DiscussionKey"], name: category["DiscussionName"] }
end
end
def import_posts
puts "", "importing topics and posts"
total_count = mysql_query("SELECT count(*) FROM DiscussionPost").first["count"]
batches(BATCH_SIZE) do |offset|
results = mysql_query <<-SQL
SELECT MessageKey,
ParentMessageKey,
Subject,
ContactKey,
DiscussionKey,
PinnedFlag,
Body,
CreatedOn
FROM DiscussionPost
WHERE CreatedOn > '2020-01-01 00:00:00'
LIMIT #{BATCH_SIZE}
OFFSET #{offset}
SQL
break if results.size < 1
next if all_records_exist? :posts, results.map { |p| p["MessageKey"] }
create_posts(results, total: total_count, offset: offset) do |post|
raw = preprocess_raw(post["Body"])
mapped = {
id: post["MessageKey"],
user_id: user_id_from_imported_user_id(post["ContactKey"]),
raw: raw,
created_at: Time.zone.at(post["CreatedOn"]),
}
if post["ParentMessageKey"].nil?
mapped[:category] = category_id_from_imported_category_id(post["DiscussionKey"]).to_i
mapped[:title] = CGI.unescapeHTML(post["Subject"])
mapped[:pinned] = post["PinnedFlag"] == 1
else
topic = topic_lookup_from_imported_post_id(post["ParentMessageKey"])
if topic.present?
mapped[:topic_id] = topic[:topic_id]
else
puts "Parent post #{post["ParentMessageKey"]} doesn't exist. Skipping."
next
end
end
mapped
end
end
end
def import_attachments
puts "", "importing attachments"
count = 0
total_attachments = mysql_query(<<-SQL).first["count"]
SELECT COUNT(*) count
FROM LibraryEntryFile l
JOIN DiscussionPost p ON p.AttachmentDocumentKey = l.DocumentKey
WHERE p.CreatedOn > '2020-01-01 00:00:00'
SQL
batches(BATCH_SIZE) do |offset|
attachments = mysql_query(<<-SQL).to_a
SELECT l.VersionName,
l.FileExtension,
p.MessageKey
FROM LibraryEntryFile l
LEFT JOIN DiscussionPost p ON p.AttachmentDocumentKey = l.DocumentKey
WHERE p.CreatedOn > '2020-01-01 00:00:00'
LIMIT #{BATCH_SIZE}
OFFSET #{offset}
SQL
break if attachments.empty?
attachments.each do |a|
print_status(count += 1, total_attachments, get_start_time("attachments"))
original_filename = "#{a["VersionName"]}.#{a["FileExtension"]}"
path = File.join(ATTACHMENT_DIR, original_filename)
if File.exist?(path)
if post = Post.find(post_id_from_imported_post_id(a["MessageKey"]))
filename = File.basename(original_filename)
upload = create_upload(post.user.id, path, filename)
if upload&.persisted?
html = html_for_upload(upload, filename)
post.raw << "\n\n" << html
post.save!
UploadReference.ensure_exist!(upload_ids: [upload.id], target: post)
end
end
end
end
end
end
def preprocess_raw(body)
raw = body.dup
# trim off any post text beyond ---- to remove email threading
raw = raw.slice(0..(raw.index("------"))) || raw
raw = HtmlToMarkdown.new(raw).to_markdown
raw
end
def mysql_query(sql)
@client.query(sql, cache_rows: false)
end
end
ImportScripts::HigherLogic.new.perform