mirror of
https://github.com/discourse/discourse.git
synced 2025-01-23 02:11:44 +08:00
584 lines
17 KiB
Ruby
584 lines
17 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
require "mysql2"
|
|
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
|
|
require "htmlentities"
|
|
|
|
class ImportScripts::Modx < ImportScripts::Base
|
|
BATCH_SIZE = 1000
|
|
|
|
# CHANGE THESE BEFORE RUNNING THE IMPORTER
|
|
|
|
DB_HOST = ENV["DB_HOST"] || "localhost"
|
|
DB_NAME = ENV["DB_NAME"] || "modx"
|
|
DB_PW = ENV["DB_PW"] || "modex"
|
|
DB_USER = ENV["DB_USER"] || "modx"
|
|
TIMEZONE = ENV["TIMEZONE"] || "America/Los_Angeles"
|
|
TABLE_PREFIX = ENV["TABLE_PREFIX"] || "modx_"
|
|
ATTACHMENT_DIR = ENV["ATTACHMENT_DIR"] || "/path/to/your/attachment/folder"
|
|
RANDOM_CATEGORY_COLOR = !ENV["RANDOM_CATEGORY_COLOR"].nil?
|
|
SUSPEND_ALL_USERS = !ENV["SUSPEND_ALL_USERS"]
|
|
|
|
# TODO: replace modx_ with #{TABLE_PREFIX}
|
|
|
|
puts "#{DB_USER}:#{DB_PW}@#{DB_HOST} wants #{DB_NAME}"
|
|
|
|
def initialize
|
|
super
|
|
|
|
SiteSetting.disable_emails = "non-staff"
|
|
|
|
@old_username_to_new_usernames = {}
|
|
|
|
@tz = TZInfo::Timezone.get(TIMEZONE)
|
|
|
|
@htmlentities = HTMLEntities.new
|
|
|
|
@client =
|
|
Mysql2::Client.new(host: DB_HOST, username: DB_USER, password: DB_PW, database: DB_NAME)
|
|
rescue Exception => e
|
|
puts "=" * 50
|
|
puts e.message
|
|
puts <<~TEXT
|
|
Cannot connect in to database.
|
|
|
|
Hostname: #{DB_HOST}
|
|
Username: #{DB_USER}
|
|
Password: #{DB_PW}
|
|
database: #{DB_NAME}
|
|
|
|
Edit the script or set these environment variables:
|
|
|
|
export DB_HOST="localhost"
|
|
export DB_NAME="modx"
|
|
export DB_PW="modx"
|
|
export DB_USER="modx"
|
|
export TABLE_PREFIX="modx_"
|
|
export ATTACHMENT_DIR '/path/to/your/attachment/folder'
|
|
|
|
Exiting.
|
|
TEXT
|
|
exit
|
|
end
|
|
|
|
def execute
|
|
import_users
|
|
import_categories
|
|
import_topics_and_posts
|
|
deactivate_all_users
|
|
suspend_users
|
|
end
|
|
|
|
def not_import_groups
|
|
puts "", "importing groups..."
|
|
|
|
groups = mysql_query <<-SQL
|
|
SELECT usergroupid, title
|
|
FROM #{TABLE_PREFIX}usergroup
|
|
ORDER BY usergroupid
|
|
SQL
|
|
|
|
create_groups(groups) do |group|
|
|
{ id: group["usergroupid"], name: @htmlentities.decode(group["title"]).strip }
|
|
end
|
|
end
|
|
|
|
def import_users
|
|
puts "", "importing users"
|
|
|
|
user_count =
|
|
mysql_query("SELECT COUNT(id) count FROM #{TABLE_PREFIX}discuss_users").first["count"]
|
|
|
|
last_user_id = -1
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
users = mysql_query(<<-SQL).to_a
|
|
SELECT id as userid, email, concat (name_first, " ", name_last) as name, username,
|
|
location, website, status, last_active as last_seen_at,
|
|
createdon as created_at, birthdate as date_of_birth,
|
|
avatar as avatar_url,
|
|
password,
|
|
salt,
|
|
display_name,
|
|
use_display_name
|
|
FROM #{TABLE_PREFIX}discuss_users
|
|
WHERE id > #{last_user_id}
|
|
ORDER BY id
|
|
LIMIT #{BATCH_SIZE};
|
|
SQL
|
|
|
|
break if users.empty?
|
|
|
|
last_user_id = users[-1]["userid"]
|
|
before = users.size
|
|
users.reject! { |u| @lookup.user_already_imported?(u["userid"].to_i) }
|
|
|
|
create_users(users, total: user_count, offset: offset) do |user|
|
|
{
|
|
id: user["userid"],
|
|
name: user["name"],
|
|
username: user["username"],
|
|
email: user["email"],
|
|
website: user["website"],
|
|
created_at: parse_timestamp(user["created_at"]),
|
|
last_seen_at: parse_timestamp(user["last_seen_at"]),
|
|
date_of_birth: user["date_of_birth"],
|
|
password: "#{user["password"]}:#{user["salt"]}", # not tested
|
|
}
|
|
end
|
|
end
|
|
end
|
|
|
|
def import_categories
|
|
# import modx_discuss_categories as categories
|
|
# import modx_discuss_boards as subcategories
|
|
puts "", "importing categories..."
|
|
|
|
categories = mysql_query("select id, name, description from modx_discuss_categories").to_a
|
|
|
|
create_categories(categories) do |category|
|
|
puts "Creating #{category["name"]}"
|
|
puts category
|
|
{
|
|
id: "cat#{category["id"]}",
|
|
name: category["name"],
|
|
color: RANDOM_CATEGORY_COLOR ? (0..2).map { "%0x" % (rand * 0x80) }.join : nil,
|
|
description: category["description"],
|
|
}
|
|
end
|
|
|
|
puts "", "importing boards as subcategories..."
|
|
|
|
boards = mysql_query("select id, category, name, description from modx_discuss_boards;").to_a
|
|
create_categories(boards) do |category|
|
|
puts category
|
|
parent_category_id = category_id_from_imported_category_id("cat#{category["category"]}")
|
|
{
|
|
id: category["id"],
|
|
parent_category_id: parent_category_id.to_s,
|
|
name: category["name"],
|
|
color: RANDOM_CATEGORY_COLOR ? (0..2).map { "%0x" % (rand * 0x80) }.join : nil,
|
|
description: category["description"],
|
|
}
|
|
end
|
|
end
|
|
|
|
def import_topics_and_posts
|
|
puts "", "creating topics and posts"
|
|
|
|
total_count =
|
|
mysql_query("SELECT count(id) count from #{TABLE_PREFIX}discuss_posts").first["count"]
|
|
|
|
topic_first_post_id = {}
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
results =
|
|
mysql_query(
|
|
"
|
|
SELECT id,
|
|
thread topic_id,
|
|
board category_id,
|
|
title,
|
|
message raw,
|
|
parent,
|
|
author user_id,
|
|
createdon created_at
|
|
from modx_discuss_posts
|
|
ORDER BY createdon
|
|
LIMIT #{BATCH_SIZE}
|
|
OFFSET #{offset};
|
|
",
|
|
)
|
|
|
|
break if results.size < 1
|
|
|
|
next if all_records_exist? :posts, results.map { |m| m["id"].to_i }
|
|
|
|
create_posts(results, total: total_count, offset: offset) do |m|
|
|
skip = false
|
|
mapped = {}
|
|
|
|
mapped[:id] = m["id"]
|
|
mapped[:user_id] = user_id_from_imported_user_id(m["user_id"]) || -1
|
|
mapped[:raw] = post_process_raw(m["raw"])
|
|
mapped[:created_at] = Time.zone.at(m["created_at"])
|
|
|
|
if m["parent"] == 0
|
|
mapped[:category] = category_id_from_imported_category_id(m["category_id"])
|
|
mapped[:title] = m["title"]
|
|
topic_first_post_id[m["topic_id"]] = m["id"]
|
|
else
|
|
parent = topic_lookup_from_imported_post_id(topic_first_post_id[m["topic_id"]])
|
|
if parent
|
|
mapped[:topic_id] = parent[:topic_id]
|
|
else
|
|
puts "Parent post #{first_post_id} doesn't exist. Skipping #{m["id"]}: #{m["title"][0..40]}"
|
|
skip = true
|
|
end
|
|
end
|
|
|
|
skip ? nil : mapped
|
|
end
|
|
end
|
|
end
|
|
|
|
def post_process_raw(raw)
|
|
# [QUOTE]...[/QUOTE]
|
|
raw =
|
|
raw.gsub(%r{\[quote.*?\](.+?)\[/quote\]}im) do |quote|
|
|
quote = quote.gsub(/\[quote author=(.*?) .+\]/i) { "\n[quote=\"#{$1}\"]\n" }
|
|
quote = quote.gsub(%r{[^\n]\[/quote\]}im) { "\n[/quote]\n" }
|
|
end
|
|
|
|
raw
|
|
end
|
|
|
|
def not_mark_topics_as_solved
|
|
puts "", "Marking topics as solved..."
|
|
|
|
DB.exec <<~SQL
|
|
INSERT INTO topic_custom_fields (name, value, topic_id, created_at, updated_at)
|
|
SELECT 'accepted_answer_post_id', pcf.post_id, p.topic_id, p.created_at, p.created_at
|
|
FROM post_custom_fields pcf
|
|
JOIN posts p ON p.id = pcf.post_id
|
|
WHERE pcf.name = 'is_accepted_answer'
|
|
SQL
|
|
end
|
|
|
|
# find the uploaded file information from the db
|
|
def not_find_upload(post, attachment_id)
|
|
sql =
|
|
"SELECT a.attachmentid attachment_id, a.userid user_id, a.filedataid file_id, a.filename filename,
|
|
a.caption caption
|
|
FROM #{TABLE_PREFIX}attachment a
|
|
WHERE a.attachmentid = #{attachment_id}"
|
|
results = mysql_query(sql)
|
|
|
|
unless row = results.first
|
|
puts "Couldn't find attachment record for post.id = #{post.id}, import_id = #{post.custom_fields["import_id"]}"
|
|
return
|
|
end
|
|
|
|
filename =
|
|
File.join(ATTACHMENT_DIR, row["user_id"].to_s.split("").join("/"), "#{row["file_id"]}.attach")
|
|
unless File.exist?(filename)
|
|
puts "Attachment file doesn't exist: #{filename}"
|
|
return
|
|
end
|
|
|
|
real_filename = row["filename"]
|
|
real_filename.prepend SecureRandom.hex if real_filename[0] == "."
|
|
upload = create_upload(post.user.id, filename, real_filename)
|
|
|
|
if upload.nil? || !upload.valid?
|
|
puts "Upload not valid :("
|
|
puts upload.errors.inspect if upload
|
|
return
|
|
end
|
|
|
|
[upload, real_filename]
|
|
rescue Mysql2::Error => e
|
|
puts "SQL Error"
|
|
puts e.message
|
|
puts sql
|
|
end
|
|
|
|
def not_import_private_messages
|
|
puts "", "importing private messages..."
|
|
|
|
topic_count =
|
|
mysql_query("SELECT COUNT(pmtextid) count FROM #{TABLE_PREFIX}pmtext").first["count"]
|
|
|
|
last_private_message_id = -1
|
|
|
|
batches(BATCH_SIZE) do |offset|
|
|
private_messages = mysql_query(<<-SQL).to_a
|
|
SELECT pmtextid, fromuserid, title, message, touserarray, dateline
|
|
FROM #{TABLE_PREFIX}pmtext
|
|
WHERE pmtextid > #{last_private_message_id}
|
|
ORDER BY pmtextid
|
|
LIMIT #{BATCH_SIZE}
|
|
SQL
|
|
|
|
break if private_messages.empty?
|
|
|
|
last_private_message_id = private_messages[-1]["pmtextid"]
|
|
private_messages.reject! { |pm| @lookup.post_already_imported?("pm-#{pm["pmtextid"]}") }
|
|
|
|
title_username_of_pm_first_post = {}
|
|
|
|
create_posts(private_messages, total: topic_count, offset: offset) do |m|
|
|
skip = false
|
|
mapped = {}
|
|
|
|
mapped[:id] = "pm-#{m["pmtextid"]}"
|
|
mapped[:user_id] = user_id_from_imported_user_id(m["fromuserid"]) ||
|
|
Discourse::SYSTEM_USER_ID
|
|
mapped[:raw] = begin
|
|
preprocess_post_raw(m["message"])
|
|
rescue StandardError
|
|
nil
|
|
end
|
|
mapped[:created_at] = Time.zone.at(m["dateline"])
|
|
title = @htmlentities.decode(m["title"]).strip[0...255]
|
|
topic_id = nil
|
|
|
|
next if mapped[:raw].blank?
|
|
|
|
# users who are part of this private message.
|
|
target_usernames = []
|
|
target_userids = []
|
|
begin
|
|
to_user_array = PHP.unserialize(m["touserarray"])
|
|
rescue StandardError
|
|
puts "#{m["pmtextid"]} -- #{m["touserarray"]}"
|
|
skip = true
|
|
end
|
|
|
|
begin
|
|
to_user_array.each do |to_user|
|
|
if to_user[0] == "cc" || to_user[0] == "bcc" # not sure if we should include bcc users
|
|
to_user[1].each do |to_user_cc|
|
|
user_id = user_id_from_imported_user_id(to_user_cc[0])
|
|
username = User.find_by(id: user_id).try(:username)
|
|
target_userids << user_id || Discourse::SYSTEM_USER_ID
|
|
target_usernames << username if username
|
|
end
|
|
else
|
|
user_id = user_id_from_imported_user_id(to_user[0])
|
|
username = User.find_by(id: user_id).try(:username)
|
|
target_userids << user_id || Discourse::SYSTEM_USER_ID
|
|
target_usernames << username if username
|
|
end
|
|
end
|
|
rescue StandardError
|
|
puts "skipping pm-#{m["pmtextid"]} `to_user_array` is not properly serialized -- #{to_user_array.inspect}"
|
|
skip = true
|
|
end
|
|
|
|
participants = target_userids
|
|
participants << mapped[:user_id]
|
|
begin
|
|
participants.sort!
|
|
rescue StandardError
|
|
puts "one of the participant's id is nil -- #{participants.inspect}"
|
|
end
|
|
|
|
if title =~ /^Re:/
|
|
parent_id =
|
|
title_username_of_pm_first_post[[title[3..-1], participants]] ||
|
|
title_username_of_pm_first_post[[title[4..-1], participants]] ||
|
|
title_username_of_pm_first_post[[title[5..-1], participants]] ||
|
|
title_username_of_pm_first_post[[title[6..-1], participants]] ||
|
|
title_username_of_pm_first_post[[title[7..-1], participants]] ||
|
|
title_username_of_pm_first_post[[title[8..-1], participants]]
|
|
|
|
if parent_id
|
|
if t = topic_lookup_from_imported_post_id("pm-#{parent_id}")
|
|
topic_id = t[:topic_id]
|
|
end
|
|
end
|
|
else
|
|
title_username_of_pm_first_post[[title, participants]] ||= m["pmtextid"]
|
|
end
|
|
|
|
if topic_id
|
|
mapped[:topic_id] = topic_id
|
|
else
|
|
mapped[:title] = title
|
|
mapped[:archetype] = Archetype.private_message
|
|
mapped[:target_usernames] = target_usernames.join(",")
|
|
|
|
if mapped[:target_usernames].size < 1 # pm with yourself?
|
|
# skip = true
|
|
mapped[:target_usernames] = "system"
|
|
puts "pm-#{m["pmtextid"]} has no target (#{m["touserarray"]})"
|
|
end
|
|
end
|
|
|
|
skip ? nil : mapped
|
|
end
|
|
end
|
|
end
|
|
|
|
def not_import_attachments
|
|
puts "", "importing attachments..."
|
|
|
|
current_count = 0
|
|
|
|
total_count = mysql_query(<<-SQL).first["count"]
|
|
SELECT COUNT(postid) count
|
|
FROM #{TABLE_PREFIX}post p
|
|
JOIN #{TABLE_PREFIX}thread t ON t.threadid = p.threadid
|
|
WHERE t.firstpostid <> p.postid
|
|
SQL
|
|
|
|
success_count = 0
|
|
fail_count = 0
|
|
|
|
attachment_regex = %r{\[attach[^\]]*\](\d+)\[/attach\]}i
|
|
|
|
Post.find_each do |post|
|
|
current_count += 1
|
|
print_status current_count, total_count
|
|
|
|
new_raw = post.raw.dup
|
|
new_raw.gsub!(attachment_regex) do |s|
|
|
matches = attachment_regex.match(s)
|
|
attachment_id = matches[1]
|
|
|
|
upload, filename = find_upload(post, attachment_id)
|
|
unless upload
|
|
fail_count += 1
|
|
next
|
|
end
|
|
|
|
html_for_upload(upload, filename)
|
|
end
|
|
|
|
if new_raw != post.raw
|
|
PostRevisor.new(post).revise!(
|
|
post.user,
|
|
{ raw: new_raw },
|
|
bypass_bump: true,
|
|
edit_reason: "Import attachments from modx",
|
|
)
|
|
end
|
|
|
|
success_count += 1
|
|
end
|
|
end
|
|
|
|
def not_close_topics
|
|
puts "", "Closing topics..."
|
|
|
|
# keep track of closed topics
|
|
closed_topic_ids = []
|
|
|
|
topics = mysql_query <<-SQL
|
|
SELECT t.threadid threadid, firstpostid, open
|
|
FROM #{TABLE_PREFIX}thread t
|
|
JOIN #{TABLE_PREFIX}post p ON p.postid = t.firstpostid
|
|
ORDER BY t.threadid
|
|
SQL
|
|
topics.each do |topic|
|
|
topic_id = "thread-#{topic["threadid"]}"
|
|
closed_topic_ids << topic_id if topic["open"] == 0
|
|
end
|
|
|
|
sql = <<-SQL
|
|
WITH closed_topic_ids AS (
|
|
SELECT t.id AS topic_id
|
|
FROM post_custom_fields pcf
|
|
JOIN posts p ON p.id = pcf.post_id
|
|
JOIN topics t ON t.id = p.topic_id
|
|
WHERE pcf.name = 'import_id'
|
|
AND pcf.value IN (?)
|
|
)
|
|
UPDATE topics
|
|
SET closed = true
|
|
WHERE id IN (SELECT topic_id FROM closed_topic_ids)
|
|
SQL
|
|
|
|
DB.exec(sql, closed_topic_ids)
|
|
end
|
|
|
|
def not_post_process_posts
|
|
puts "", "Postprocessing posts..."
|
|
|
|
current = 0
|
|
max = Post.count
|
|
|
|
Post.find_each do |post|
|
|
begin
|
|
new_raw = postprocess_post_raw(post.raw)
|
|
if new_raw != post.raw
|
|
post.raw = new_raw
|
|
post.save
|
|
end
|
|
rescue PrettyText::JavaScriptError
|
|
nil
|
|
ensure
|
|
print_status(current += 1, max)
|
|
end
|
|
end
|
|
end
|
|
|
|
def not_create_permalink_file
|
|
puts "", "Creating Permalink File...", ""
|
|
|
|
id_mapping = []
|
|
|
|
Topic.listable_topics.find_each do |topic|
|
|
pcf = topic.first_post.custom_fields
|
|
if pcf && pcf["import_id"]
|
|
id = pcf["import_id"].split("-").last
|
|
id_mapping.push("XXX#{id} YYY#{topic.id}")
|
|
end
|
|
end
|
|
|
|
# Category.find_each do |cat|
|
|
# ccf = cat.custom_fields
|
|
# if ccf && ccf["import_id"]
|
|
# id = ccf["import_id"].to_i
|
|
# id_mapping.push("/forumdisplay.php?#{id} http://forum.quartertothree.com#{cat.url}")
|
|
# end
|
|
# end
|
|
|
|
CSV.open(File.expand_path("../vb_map.csv", __FILE__), "w") do |csv|
|
|
id_mapping.each { |value| csv << [value] }
|
|
end
|
|
end
|
|
|
|
def deactivate_all_users
|
|
User.where("id > 0 and admin != true").update_all(active: true)
|
|
end
|
|
|
|
def suspend_users
|
|
puts "", "updating blocked users"
|
|
|
|
banned = 0
|
|
failed = 0
|
|
total =
|
|
mysql_query(
|
|
"SELECT count(*) count FROM #{TABLE_PREFIX}user_attributes where blocked != 0",
|
|
).first[
|
|
"count"
|
|
]
|
|
|
|
system_user = Discourse.system_user
|
|
|
|
mysql_query(
|
|
"SELECT id, blockedafter, blockeduntil FROM #{TABLE_PREFIX}user_attributes",
|
|
).each do |b|
|
|
user = User.find_by_id(user_id_from_imported_user_id(b["id"]))
|
|
if user
|
|
user.suspended_at = parse_timestamp(user["blockedafter"])
|
|
user.suspended_till = parse_timestamp(user["blockeduntil"])
|
|
if user.save
|
|
StaffActionLogger.new(system_user).log_user_suspend(user, "banned during initial import")
|
|
banned += 1
|
|
else
|
|
puts "Failed to suspend user #{user.username}. #{user.errors.try(:full_messages).try(:inspect)}"
|
|
failed += 1
|
|
end
|
|
else
|
|
puts "Not found: #{b["userid"]}"
|
|
failed += 1
|
|
end
|
|
|
|
print_status banned + failed, total
|
|
end
|
|
end
|
|
|
|
def parse_timestamp(timestamp)
|
|
Time.zone.at(@tz.utc_to_local(timestamp))
|
|
end
|
|
|
|
def mysql_query(sql)
|
|
@client.query(sql, cache_rows: true)
|
|
end
|
|
end
|
|
|
|
ImportScripts::Modx.new.perform
|