mirror of
https://github.com/discourse/discourse.git
synced 2024-11-23 15:05:24 +08:00
8a5d97ef3f
Discourse stopped using PostUpload in 9db8f00b3d
. Since then, these importers have been writing to the table, but any data was totally unused. This commit updates the easy cases to use UploadReference, and adds an error to the discourse_merger import script, which needs more significant work.
216 lines
6.0 KiB
Ruby
216 lines
6.0 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
require "mysql2"
|
|
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
|
|
|
|
class ImportScripts::HigherLogic < ImportScripts::Base
|
|
HIGHERLOGIC_DB = "higherlogic"
|
|
BATCH_SIZE = 1000
|
|
ATTACHMENT_DIR = "/shared/import/data/attachments"
|
|
|
|
def initialize
|
|
super
|
|
|
|
@client = Mysql2::Client.new(host: "localhost", username: "root", database: HIGHERLOGIC_DB)
|
|
end
|
|
|
|
def execute
|
|
import_groups
|
|
import_users
|
|
import_group_users
|
|
import_categories
|
|
import_posts
|
|
import_attachments
|
|
end
|
|
|
|
def import_groups
|
|
puts "", "importing groups"
|
|
|
|
groups = mysql_query <<-SQL
|
|
SELECT CommunityKey, CommunityName
|
|
FROM Community
|
|
ORDER BY CommunityName
|
|
SQL
|
|
|
|
create_groups(groups) { |group| { id: group["CommunityKey"], name: group["CommunityName"] } }
|
|
end
|
|
|
|
def import_users
|
|
puts "", "importing users"
|
|
total_count = mysql_query("SELECT count(*) FROM Contact").first["count"]
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
results = mysql_query <<-SQL
|
|
SELECT ContactKey, FirstName, LastName, EmailAddress, HLAdminFlag, UserStatus, CreatedOn, Birthday, Bio
|
|
FROM Contact
|
|
LIMIT #{BATCH_SIZE}
|
|
OFFSET #{offset}
|
|
SQL
|
|
|
|
break if results.size < 1
|
|
|
|
next if all_records_exist? :users, results.map { |u| u["ContactKey"] }
|
|
|
|
create_users(results, total: total_count, offset: offset) do |user|
|
|
next if user["EmailAddress"].blank?
|
|
{
|
|
id: user["ContactKey"],
|
|
email: user["EmailAddress"],
|
|
name: "#{user["FirstName"]} #{user["LastName"]}",
|
|
created_at: user["CreatedOn"] == nil ? 0 : Time.zone.at(user["CreatedOn"]),
|
|
bio_raw: user["Bio"],
|
|
active: user["UserStatus"] == "Active",
|
|
admin: user["HLAdminFlag"] == 1,
|
|
}
|
|
end
|
|
end
|
|
end
|
|
|
|
def import_group_users
|
|
puts "", "importing group users"
|
|
|
|
group_users = mysql_query(<<-SQL).to_a
|
|
SELECT CommunityKey, ContactKey
|
|
FROM CommunityMember
|
|
SQL
|
|
|
|
group_users.each do |row|
|
|
next unless user_id = user_id_from_imported_user_id(row["ContactKey"])
|
|
next unless group_id = group_id_from_imported_group_id(row["CommunityKey"])
|
|
puts "", "."
|
|
|
|
GroupUser.find_or_create_by(user_id: user_id, group_id: group_id)
|
|
end
|
|
end
|
|
|
|
def import_categories
|
|
puts "", "importing categories"
|
|
|
|
categories = mysql_query <<-SQL
|
|
SELECT DiscussionKey, DiscussionName
|
|
FROM Discussion
|
|
SQL
|
|
|
|
create_categories(categories) do |category|
|
|
{ id: category["DiscussionKey"], name: category["DiscussionName"] }
|
|
end
|
|
end
|
|
|
|
def import_posts
|
|
puts "", "importing topics and posts"
|
|
total_count = mysql_query("SELECT count(*) FROM DiscussionPost").first["count"]
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
results = mysql_query <<-SQL
|
|
SELECT MessageKey,
|
|
ParentMessageKey,
|
|
Subject,
|
|
ContactKey,
|
|
DiscussionKey,
|
|
PinnedFlag,
|
|
Body,
|
|
CreatedOn
|
|
FROM DiscussionPost
|
|
WHERE CreatedOn > '2020-01-01 00:00:00'
|
|
LIMIT #{BATCH_SIZE}
|
|
OFFSET #{offset}
|
|
SQL
|
|
|
|
break if results.size < 1
|
|
next if all_records_exist? :posts, results.map { |p| p["MessageKey"] }
|
|
|
|
create_posts(results, total: total_count, offset: offset) do |post|
|
|
raw = preprocess_raw(post["Body"])
|
|
mapped = {
|
|
id: post["MessageKey"],
|
|
user_id: user_id_from_imported_user_id(post["ContactKey"]),
|
|
raw: raw,
|
|
created_at: Time.zone.at(post["CreatedOn"]),
|
|
}
|
|
|
|
if post["ParentMessageKey"].nil?
|
|
mapped[:category] = category_id_from_imported_category_id(post["DiscussionKey"]).to_i
|
|
mapped[:title] = CGI.unescapeHTML(post["Subject"])
|
|
mapped[:pinned] = post["PinnedFlag"] == 1
|
|
else
|
|
topic = topic_lookup_from_imported_post_id(post["ParentMessageKey"])
|
|
|
|
if topic.present?
|
|
mapped[:topic_id] = topic[:topic_id]
|
|
else
|
|
puts "Parent post #{post["ParentMessageKey"]} doesn't exist. Skipping."
|
|
next
|
|
end
|
|
end
|
|
|
|
mapped
|
|
end
|
|
end
|
|
end
|
|
|
|
def import_attachments
|
|
puts "", "importing attachments"
|
|
|
|
count = 0
|
|
|
|
total_attachments = mysql_query(<<-SQL).first["count"]
|
|
SELECT COUNT(*) count
|
|
FROM LibraryEntryFile l
|
|
JOIN DiscussionPost p ON p.AttachmentDocumentKey = l.DocumentKey
|
|
WHERE p.CreatedOn > '2020-01-01 00:00:00'
|
|
SQL
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
attachments = mysql_query(<<-SQL).to_a
|
|
SELECT l.VersionName,
|
|
l.FileExtension,
|
|
p.MessageKey
|
|
FROM LibraryEntryFile l
|
|
LEFT JOIN DiscussionPost p ON p.AttachmentDocumentKey = l.DocumentKey
|
|
WHERE p.CreatedOn > '2020-01-01 00:00:00'
|
|
LIMIT #{BATCH_SIZE}
|
|
OFFSET #{offset}
|
|
SQL
|
|
|
|
break if attachments.empty?
|
|
|
|
attachments.each do |a|
|
|
print_status(count += 1, total_attachments, get_start_time("attachments"))
|
|
original_filename = "#{a["VersionName"]}.#{a["FileExtension"]}"
|
|
path = File.join(ATTACHMENT_DIR, original_filename)
|
|
|
|
if File.exist?(path)
|
|
if post = Post.find(post_id_from_imported_post_id(a["MessageKey"]))
|
|
filename = File.basename(original_filename)
|
|
upload = create_upload(post.user.id, path, filename)
|
|
|
|
if upload&.persisted?
|
|
html = html_for_upload(upload, filename)
|
|
|
|
post.raw << "\n\n" << html
|
|
post.save!
|
|
UploadReference.ensure_exist!(upload_ids: [upload.id], target: post)
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
def preprocess_raw(body)
|
|
raw = body.dup
|
|
|
|
# trim off any post text beyond ---- to remove email threading
|
|
raw = raw.slice(0..(raw.index("------"))) || raw
|
|
|
|
raw = HtmlToMarkdown.new(raw).to_markdown
|
|
raw
|
|
end
|
|
|
|
def mysql_query(sql)
|
|
@client.query(sql, cache_rows: false)
|
|
end
|
|
end
|
|
|
|
ImportScripts::HigherLogic.new.perform
|