mirror of
https://github.com/discourse/discourse.git
synced 2024-11-24 20:51:50 +08:00
1168 lines
40 KiB
Ruby
1168 lines
40 KiB
Ruby
# encoding: utf-8
|
||
# frozen_string_literal: true
|
||
|
||
#
|
||
# Author: Erick Guan <fantasticfears@gmail.com>
|
||
#
|
||
# This script import the data from latest Discuz! X
|
||
# Should work among Discuz! X3.x
|
||
# This script is tested only on Simplified Chinese Discuz! X instances
|
||
# If you want to import data other than Simplified Chinese, email me.
|
||
|
||
require "php_serialize"
|
||
require "miro"
|
||
require "mysql2"
|
||
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
|
||
|
||
class ImportScripts::DiscuzX < ImportScripts::Base
|
||
DISCUZX_DB = "ultrax"
|
||
DB_TABLE_PREFIX = "pre_"
|
||
BATCH_SIZE = 1000
|
||
ORIGINAL_SITE_PREFIX = "oldsite.example.com/forums" # without http(s)://
|
||
NEW_SITE_PREFIX = "http://discourse.example.com" # with http:// or https://
|
||
|
||
# Set DISCUZX_BASE_DIR to the base directory of your discuz installation.
|
||
DISCUZX_BASE_DIR = "/var/www/discuz/upload"
|
||
AVATAR_DIR = "/uc_server/data/avatar"
|
||
ATTACHMENT_DIR = "/data/attachment/forum"
|
||
AUTHORIZED_EXTENSIONS = %w[jpg jpeg png gif zip rar pdf]
|
||
|
||
def initialize
|
||
super
|
||
|
||
@client =
|
||
Mysql2::Client.new(
|
||
host: "localhost",
|
||
username: "root",
|
||
#password: "password",
|
||
database: DISCUZX_DB,
|
||
)
|
||
@first_post_id_by_topic_id = {}
|
||
|
||
@internal_url_regexps = [
|
||
%r{http(?:s)?://#{ORIGINAL_SITE_PREFIX.gsub(".", '\.')}/forum\.php\?mod=viewthread(?:&|&)tid=(?<tid>\d+)(?:[^\[\]\s]*)(?:pid=?(?<pid>\d+))?(?:[^\[\]\s]*)},
|
||
%r{http(?:s)?://#{ORIGINAL_SITE_PREFIX.gsub(".", '\.')}/viewthread\.php\?tid=(?<tid>\d+)(?:[^\[\]\s]*)(?:pid=?(?<pid>\d+))?(?:[^\[\]\s]*)},
|
||
%r{http(?:s)?://#{ORIGINAL_SITE_PREFIX.gsub(".", '\.')}/forum\.php\?mod=redirect(?:&|&)goto=findpost(?:&|&)pid=(?<pid>\d+)(?:&|&)ptid=(?<tid>\d+)(?:[^\[\]\s]*)},
|
||
%r{http(?:s)?://#{ORIGINAL_SITE_PREFIX.gsub(".", '\.')}/redirect\.php\?goto=findpost(?:&|&)pid=(?<pid>\d+)(?:&|&)ptid=(?<tid>\d+)(?:[^\[\]\s]*)},
|
||
%r{http(?:s)?://#{ORIGINAL_SITE_PREFIX.gsub(".", '\.')}/forumdisplay\.php\?fid=(?<fid>\d+)(?:[^\[\]\s]*)},
|
||
%r{http(?:s)?://#{ORIGINAL_SITE_PREFIX.gsub(".", '\.')}/forum\.php\?mod=forumdisplay(?:&|&)fid=(?<fid>\d+)(?:[^\[\]\s]*)},
|
||
%r{http(?:s)?://#{ORIGINAL_SITE_PREFIX.gsub(".", '\.')}/(?<action>index)\.php(?:[^\[\]\s]*)},
|
||
%r{http(?:s)?://#{ORIGINAL_SITE_PREFIX.gsub(".", '\.')}/(?<action>stats)\.php(?:[^\[\]\s]*)},
|
||
%r{http(?:s)?://#{ORIGINAL_SITE_PREFIX.gsub(".", '\.')}/misc.php\?mod=(?<mod>stat|ranklist)(?:[^\[\]\s]*)},
|
||
]
|
||
end
|
||
|
||
def execute
|
||
get_knowledge_about_duplicated_email
|
||
import_users
|
||
import_categories
|
||
import_posts
|
||
import_private_messages
|
||
import_attachments
|
||
end
|
||
|
||
# add the prefix to the table name
|
||
def table_name(name = nil)
|
||
DB_TABLE_PREFIX + name
|
||
end
|
||
|
||
# find which group members can be granted as admin
|
||
def get_knowledge_about_group
|
||
group_table = table_name "common_usergroup"
|
||
result =
|
||
mysql_query(
|
||
"SELECT groupid group_id, radminid role_id
|
||
FROM #{group_table};",
|
||
)
|
||
@moderator_group_id = []
|
||
@admin_group_id = []
|
||
#@banned_group_id = [4,5] # 禁止的用户及其帖子均不导入,如果你想导入这些用户和帖子,请把这个数组清空。
|
||
|
||
result.each do |group|
|
||
case group["role_id"]
|
||
when 1 # 管理员
|
||
@admin_group_id << group["group_id"]
|
||
when 2,
|
||
3 # 超级版主、版主。如果你不希望原普通版主成为Discourse版主,把3去掉。
|
||
@moderator_group_id << group["group_id"]
|
||
end
|
||
end
|
||
end
|
||
|
||
def get_knowledge_about_category_slug
|
||
@category_slug = {}
|
||
results =
|
||
mysql_query(
|
||
"SELECT svalue value
|
||
FROM #{table_name "common_setting"}
|
||
WHERE skey = 'forumkeys'",
|
||
)
|
||
|
||
return if results.size < 1
|
||
value = results.first["value"]
|
||
|
||
return if value.blank?
|
||
|
||
PHP
|
||
.unserialize(value)
|
||
.each do |category_import_id, slug|
|
||
next if slug.blank?
|
||
@category_slug[category_import_id] = slug
|
||
end
|
||
end
|
||
|
||
def get_knowledge_about_duplicated_email
|
||
@duplicated_email = {}
|
||
results =
|
||
mysql_query(
|
||
"select a.uid uid, b.uid import_id from pre_common_member a
|
||
join (select uid, email from pre_common_member group by email having count(email) > 1 order by uid asc) b USING(email)
|
||
where a.uid != b.uid",
|
||
)
|
||
|
||
users = @lookup.instance_variable_get :@users
|
||
|
||
results.each do |row|
|
||
@duplicated_email[row["uid"]] = row["import_id"]
|
||
user_id = users[row["import_id"]]
|
||
users[row["uid"]] = user_id if user_id
|
||
end
|
||
end
|
||
|
||
def import_users
|
||
puts "", "creating users"
|
||
|
||
get_knowledge_about_group
|
||
|
||
sensitive_user_table = table_name "ucenter_members"
|
||
user_table = table_name "common_member"
|
||
profile_table = table_name "common_member_profile"
|
||
status_table = table_name "common_member_status"
|
||
forum_table = table_name "common_member_field_forum"
|
||
home_table = table_name "common_member_field_home"
|
||
total_count = mysql_query("SELECT count(*) count FROM #{user_table};").first["count"]
|
||
|
||
batches(BATCH_SIZE) do |offset|
|
||
results =
|
||
mysql_query(
|
||
"SELECT u.uid id, u.username username, u.email email, u.groupid group_id,
|
||
su.regdate regdate, su.password password_hash, su.salt salt,
|
||
s.regip regip, s.lastip last_visit_ip, s.lastvisit last_visit_time, s.lastpost last_posted_at, s.lastsendmail last_emailed_at,
|
||
u.emailstatus email_confirmed, u.avatarstatus avatar_exists,
|
||
p.site website, p.address address, p.bio bio, p.realname realname, p.qq qq,
|
||
p.resideprovince resideprovince, p.residecity residecity, p.residedist residedist, p.residecommunity residecommunity,
|
||
p.resideprovince birthprovince, p.birthcity birthcity, p.birthdist birthdist, p.birthcommunity birthcommunity,
|
||
h.spacecss spacecss, h.spacenote spacenote,
|
||
f.customstatus customstatus, f.sightml sightml
|
||
FROM #{user_table} u
|
||
LEFT JOIN #{sensitive_user_table} su USING(uid)
|
||
LEFT JOIN #{profile_table} p USING(uid)
|
||
LEFT JOIN #{status_table} s USING(uid)
|
||
LEFT JOIN #{forum_table} f USING(uid)
|
||
LEFT JOIN #{home_table} h USING(uid)
|
||
ORDER BY u.uid ASC
|
||
LIMIT #{BATCH_SIZE}
|
||
OFFSET #{offset};",
|
||
)
|
||
|
||
break if results.size < 1
|
||
|
||
# TODO: breaks the script reported by some users
|
||
# next if all_records_exist? :users, users.map {|u| u["id"].to_i}
|
||
|
||
create_users(results, total: total_count, offset: offset) do |user|
|
||
{
|
||
id: user["id"],
|
||
email: user["email"],
|
||
username: user["username"],
|
||
name: first_exists(user["realname"], user["customstatus"], user["username"]),
|
||
import_pass: user["password_hash"],
|
||
active: true,
|
||
salt: user["salt"],
|
||
# TODO: title: user['customstatus'], # move custom title to name since discourse can't let user custom title https://meta.discourse.org/t/let-users-custom-their-title/37626
|
||
created_at: user["regdate"] ? Time.zone.at(user["regdate"]) : nil,
|
||
registration_ip_address: user["regip"],
|
||
ip_address: user["last_visit_ip"],
|
||
last_seen_at: user["last_visit_time"],
|
||
last_emailed_at: user["last_emailed_at"],
|
||
last_posted_at: user["last_posted_at"],
|
||
moderator: @moderator_group_id.include?(user["group_id"]),
|
||
admin: @admin_group_id.include?(user["group_id"]),
|
||
website:
|
||
(user["website"] && user["website"].include?(".")) ?
|
||
user["website"].strip :
|
||
if (
|
||
user["qq"] && user["qq"].strip == (user["qq"].strip.to_i) &&
|
||
user["qq"].strip.to_i > (10_000)
|
||
)
|
||
"http://user.qzone.qq.com/" + user["qq"].strip
|
||
else
|
||
nil
|
||
end,
|
||
bio_raw:
|
||
first_exists(
|
||
(user["bio"] && CGI.unescapeHTML(user["bio"])),
|
||
user["sightml"],
|
||
user["spacenote"],
|
||
).strip[
|
||
0,
|
||
3000
|
||
],
|
||
location:
|
||
first_exists(
|
||
user["address"],
|
||
(
|
||
if !user["resideprovince"].blank?
|
||
[
|
||
user["resideprovince"],
|
||
user["residecity"],
|
||
user["residedist"],
|
||
user["residecommunity"],
|
||
]
|
||
else
|
||
[
|
||
user["birthprovince"],
|
||
user["birthcity"],
|
||
user["birthdist"],
|
||
user["birthcommunity"],
|
||
]
|
||
end
|
||
).reject { |location| location.blank? }.join(" "),
|
||
),
|
||
post_create_action:
|
||
lambda do |newmember|
|
||
if user["avatar_exists"] == (1) && newmember.uploaded_avatar_id.blank?
|
||
path, filename = discuzx_avatar_fullpath(user["id"])
|
||
if path
|
||
begin
|
||
upload = create_upload(newmember.id, path, filename)
|
||
if !upload.nil? && upload.persisted?
|
||
newmember.import_mode = false
|
||
newmember.create_user_avatar
|
||
newmember.import_mode = true
|
||
newmember.user_avatar.update(custom_upload_id: upload.id)
|
||
newmember.update(uploaded_avatar_id: upload.id)
|
||
else
|
||
puts "Error: Upload did not persist!"
|
||
end
|
||
rescue SystemCallError => err
|
||
puts "Could not import avatar: #{err.message}"
|
||
end
|
||
end
|
||
end
|
||
if !user["spacecss"].blank? && newmember.user_profile.profile_background_upload.blank?
|
||
# profile background
|
||
if matched = user["spacecss"].match(/body\s*{[^}]*url\('?(.+?)'?\)/i)
|
||
body_background = matched[1].split(ORIGINAL_SITE_PREFIX, 2).last
|
||
end
|
||
if matched = user["spacecss"].match(/#hd\s*{[^}]*url\('?(.+?)'?\)/i)
|
||
header_background = matched[1].split(ORIGINAL_SITE_PREFIX, 2).last
|
||
end
|
||
if matched = user["spacecss"].match(/.blocktitle\s*{[^}]*url\('?(.+?)'?\)/i)
|
||
blocktitle_background = matched[1].split(ORIGINAL_SITE_PREFIX, 2).last
|
||
end
|
||
if matched = user["spacecss"].match(/#ct\s*{[^}]*url\('?(.+?)'?\)/i)
|
||
content_background = matched[1].split(ORIGINAL_SITE_PREFIX, 2).last
|
||
end
|
||
|
||
if body_background || header_background || blocktitle_background ||
|
||
content_background
|
||
profile_background =
|
||
first_exists(
|
||
header_background,
|
||
body_background,
|
||
content_background,
|
||
blocktitle_background,
|
||
)
|
||
card_background =
|
||
first_exists(
|
||
content_background,
|
||
body_background,
|
||
header_background,
|
||
blocktitle_background,
|
||
)
|
||
upload =
|
||
create_upload(
|
||
newmember.id,
|
||
File.join(DISCUZX_BASE_DIR, profile_background),
|
||
File.basename(profile_background),
|
||
)
|
||
if upload
|
||
newmember.user_profile.upload_profile_background upload
|
||
else
|
||
puts "WARNING: #{user["username"]} (UID: #{user["id"]}) profile_background file did not persist!"
|
||
end
|
||
upload =
|
||
create_upload(
|
||
newmember.id,
|
||
File.join(DISCUZX_BASE_DIR, card_background),
|
||
File.basename(card_background),
|
||
)
|
||
if upload
|
||
newmember.user_profile.upload_card_background upload
|
||
else
|
||
puts "WARNING: #{user["username"]} (UID: #{user["id"]}) card_background file did not persist!"
|
||
end
|
||
end
|
||
end
|
||
|
||
# we don't send email to the unconfirmed user
|
||
if newmember.email_digests
|
||
newmember.update(email_digests: user["email_confirmed"] == 1)
|
||
end
|
||
if !newmember.name.blank? && newmember.name == (newmember.username)
|
||
newmember.update(name: "")
|
||
end
|
||
end,
|
||
}
|
||
end
|
||
end
|
||
end
|
||
|
||
def import_categories
|
||
puts "", "creating categories"
|
||
|
||
get_knowledge_about_category_slug
|
||
|
||
forums_table = table_name "forum_forum"
|
||
forums_data_table = table_name "forum_forumfield"
|
||
|
||
results =
|
||
mysql_query(
|
||
"
|
||
SELECT f.fid id, f.fup parent_id, f.name, f.type type, f.status status, f.displayorder position,
|
||
d.description description, d.rules rules, d.icon, d.extra extra
|
||
FROM #{forums_table} f
|
||
LEFT JOIN #{forums_data_table} d USING(fid)
|
||
ORDER BY parent_id ASC, id ASC
|
||
",
|
||
)
|
||
|
||
max_position = Category.all.max_by(&:position).position
|
||
create_categories(results) do |row|
|
||
next if row["type"] == ("group") || row["status"] == (2) # or row['status'].to_i == 3 # 如果不想导入群组,取消注释
|
||
extra = PHP.unserialize(row["extra"]) if !row["extra"].blank?
|
||
color = extra["namecolor"][1, 6] if extra && !extra["namecolor"].blank?
|
||
|
||
Category.all.max_by(&:position).position
|
||
|
||
h = {
|
||
id: row["id"],
|
||
name: row["name"],
|
||
description: row["description"],
|
||
position: row["position"].to_i + max_position,
|
||
color: color,
|
||
post_create_action:
|
||
lambda do |category|
|
||
if slug = @category_slug[row["id"]]
|
||
category.update(slug: slug)
|
||
end
|
||
|
||
raw = process_discuzx_post(row["rules"], nil)
|
||
if @bbcode_to_md
|
||
raw =
|
||
begin
|
||
raw.bbcode_to_md(false)
|
||
rescue StandardError
|
||
raw
|
||
end
|
||
end
|
||
category.topic.posts.first.update_attribute(:raw, raw)
|
||
if !row["icon"].empty?
|
||
upload =
|
||
create_upload(
|
||
Discourse::SYSTEM_USER_ID,
|
||
File.join(DISCUZX_BASE_DIR, ATTACHMENT_DIR, "../common", row["icon"]),
|
||
File.basename(row["icon"]),
|
||
)
|
||
if upload
|
||
category.uploaded_logo_id = upload.id
|
||
# FIXME: I don't know how to get '/shared' by script. May change to Rails.root
|
||
category.color =
|
||
Miro::DominantColors.new(File.join("/shared", upload.url)).to_hex.first[
|
||
1,
|
||
6
|
||
] if !color
|
||
category.save!
|
||
end
|
||
end
|
||
|
||
if row["status"] == (0) || row["status"] == (3)
|
||
SiteSetting.default_categories_muted = [
|
||
SiteSetting.default_categories_muted,
|
||
category.id,
|
||
].reject(&:blank?).join("|")
|
||
end
|
||
category
|
||
end,
|
||
}
|
||
if row["parent_id"].to_i > 0
|
||
h[:parent_category_id] = category_id_from_imported_category_id(row["parent_id"])
|
||
end
|
||
h
|
||
end
|
||
end
|
||
|
||
def import_posts
|
||
puts "", "creating topics and posts"
|
||
|
||
users_table = table_name "common_member"
|
||
posts_table = table_name "forum_post"
|
||
topics_table = table_name "forum_thread"
|
||
|
||
total_count = mysql_query("SELECT count(*) count FROM #{posts_table}").first["count"]
|
||
|
||
batches(BATCH_SIZE) do |offset|
|
||
results =
|
||
mysql_query(
|
||
"
|
||
SELECT p.pid id,
|
||
p.tid topic_id,
|
||
t.fid category_id,
|
||
t.subject title,
|
||
p.authorid user_id,
|
||
p.message raw,
|
||
p.dateline post_time,
|
||
p2.pid first_id,
|
||
p.invisible status,
|
||
t.special special
|
||
FROM #{posts_table} p
|
||
JOIN #{posts_table} p2 ON p2.first AND p2.tid = p.tid
|
||
JOIN #{topics_table} t ON t.tid = p.tid
|
||
ORDER BY id ASC, topic_id ASC
|
||
LIMIT #{BATCH_SIZE}
|
||
OFFSET #{offset};
|
||
",
|
||
)
|
||
# u.status != -1 AND u.groupid != 4 AND u.groupid != 5 用户未被锁定、禁访或禁言。在现实中的 Discuz 论坛,禁止的用户通常是广告机或驱逐的用户,这些不需要导入。
|
||
break if results.size < 1
|
||
|
||
next if all_records_exist? :posts, results.map { |p| p["id"].to_i }
|
||
|
||
create_posts(results, total: total_count, offset: offset) do |m|
|
||
skip = false
|
||
mapped = {}
|
||
|
||
mapped[:id] = m["id"]
|
||
mapped[:user_id] = user_id_from_imported_user_id(m["user_id"]) || -1
|
||
mapped[:raw] = process_discuzx_post(m["raw"], m["id"])
|
||
mapped[:created_at] = Time.zone.at(m["post_time"])
|
||
mapped[:tags] = m["tags"]
|
||
|
||
if m["id"] == m["first_id"]
|
||
mapped[:category] = category_id_from_imported_category_id(m["category_id"])
|
||
mapped[:title] = CGI.unescapeHTML(m["title"])
|
||
|
||
if m["special"] == 1
|
||
results =
|
||
mysql_query(
|
||
"
|
||
SELECT multiple, maxchoices
|
||
FROM #{table_name "forum_poll"}
|
||
WHERE tid = #{m["topic_id"]}",
|
||
)
|
||
poll = results.first || {}
|
||
results =
|
||
mysql_query(
|
||
"
|
||
SELECT polloption
|
||
FROM #{table_name "forum_polloption"}
|
||
WHERE tid = #{m["topic_id"]}
|
||
ORDER BY displayorder",
|
||
)
|
||
if results.empty?
|
||
puts "WARNING: can't find poll options for topic #{m["topic_id"]}, skip poll"
|
||
else
|
||
mapped[
|
||
:raw
|
||
].prepend "[poll#{poll["multiple"] ? " type=multiple" : ""}#{poll["maxchoices"] > 0 ? " max=#{poll["maxchoices"]}" : ""}]\n#{results.map { |option| "- " + option["polloption"] }.join("\n")}\n[/poll]\n"
|
||
end
|
||
end
|
||
else
|
||
parent = topic_lookup_from_imported_post_id(m["first_id"])
|
||
|
||
if parent
|
||
mapped[:topic_id] = parent[:topic_id]
|
||
reply_post_import_id = find_post_id_by_quote_number(m["raw"])
|
||
if reply_post_import_id
|
||
post_id = post_id_from_imported_post_id(reply_post_import_id.to_i)
|
||
if (post = Post.find_by(id: post_id))
|
||
if post.topic_id == mapped[:topic_id]
|
||
mapped[:reply_to_post_number] = post.post_number
|
||
else
|
||
puts "post #{m["id"]} reply to another topic, skip reply"
|
||
end
|
||
else
|
||
puts "post #{m["id"]} reply to not exists post #{reply_post_import_id}, skip reply"
|
||
end
|
||
end
|
||
else
|
||
puts "Parent topic #{m["topic_id"]} doesn't exist. Skipping #{m["id"]}: #{m["title"][0..40]}"
|
||
skip = true
|
||
end
|
||
end
|
||
|
||
if m["status"] & 1 == 1 || mapped[:raw].blank?
|
||
mapped[:post_create_action] = lambda do |action_post|
|
||
PostDestroyer.new(Discourse.system_user, action_post).perform_delete
|
||
end
|
||
elsif (m["status"] & 2) >> 1 == 1 # waiting for approve
|
||
mapped[:post_create_action] = lambda do |action_post|
|
||
PostActionCreator.notify_user(Discourse.system_user, action_post)
|
||
end
|
||
end
|
||
skip ? nil : mapped
|
||
end
|
||
end
|
||
end
|
||
|
||
def import_bookmarks
|
||
puts "", "creating bookmarks"
|
||
favorites_table = table_name "home_favorite"
|
||
posts_table = table_name "forum_post"
|
||
|
||
total_count =
|
||
mysql_query("SELECT count(*) count FROM #{favorites_table} WHERE idtype = 'tid'").first[
|
||
"count"
|
||
]
|
||
batches(BATCH_SIZE) do |offset|
|
||
results =
|
||
mysql_query(
|
||
"
|
||
SELECT p.pid post_id, f.uid user_id
|
||
FROM #{favorites_table} f
|
||
JOIN #{posts_table} p ON f.id = p.tid
|
||
WHERE f.idtype = 'tid' AND p.first
|
||
LIMIT #{BATCH_SIZE}
|
||
OFFSET #{offset};",
|
||
)
|
||
|
||
break if results.size < 1
|
||
|
||
# next if all_records_exist?
|
||
|
||
create_bookmarks(results, total: total_count, offset: offset) do |row|
|
||
{ user_id: row["user_id"], post_id: row["post_id"] }
|
||
end
|
||
end
|
||
end
|
||
|
||
def import_private_messages
|
||
puts "", "creating private messages"
|
||
|
||
pm_indexes = table_name "ucenter_pm_indexes"
|
||
pm_messages = table_name "ucenter_pm_messages"
|
||
total_count = mysql_query("SELECT count(*) count FROM #{pm_indexes}").first["count"]
|
||
|
||
batches(BATCH_SIZE) do |offset|
|
||
results =
|
||
mysql_query(
|
||
"
|
||
SELECT pmid id, plid thread_id, authorid user_id, message, dateline created_at
|
||
FROM #{pm_messages}_1
|
||
UNION SELECT pmid id, plid thread_id, authorid user_id, message, dateline created_at
|
||
FROM #{pm_messages}_2
|
||
UNION SELECT pmid id, plid thread_id, authorid user_id, message, dateline created_at
|
||
FROM #{pm_messages}_3
|
||
UNION SELECT pmid id, plid thread_id, authorid user_id, message, dateline created_at
|
||
FROM #{pm_messages}_4
|
||
UNION SELECT pmid id, plid thread_id, authorid user_id, message, dateline created_at
|
||
FROM #{pm_messages}_5
|
||
UNION SELECT pmid id, plid thread_id, authorid user_id, message, dateline created_at
|
||
FROM #{pm_messages}_6
|
||
UNION SELECT pmid id, plid thread_id, authorid user_id, message, dateline created_at
|
||
FROM #{pm_messages}_7
|
||
UNION SELECT pmid id, plid thread_id, authorid user_id, message, dateline created_at
|
||
FROM #{pm_messages}_8
|
||
UNION SELECT pmid id, plid thread_id, authorid user_id, message, dateline created_at
|
||
FROM #{pm_messages}_9
|
||
ORDER BY thread_id ASC, id ASC
|
||
LIMIT #{BATCH_SIZE}
|
||
OFFSET #{offset};",
|
||
)
|
||
|
||
break if results.size < 1
|
||
|
||
# next if all_records_exist? :posts, results.map {|m| "pm:#{m['id']}"}
|
||
|
||
create_posts(results, total: total_count, offset: offset) do |m|
|
||
skip = false
|
||
mapped = {}
|
||
|
||
mapped[:id] = "pm:#{m["id"]}"
|
||
mapped[:user_id] = user_id_from_imported_user_id(m["user_id"]) || -1
|
||
mapped[:raw] = process_discuzx_post(m["message"], m["id"])
|
||
mapped[:created_at] = Time.zone.at(m["created_at"])
|
||
thread_id = "pm_#{m["thread_id"]}"
|
||
|
||
if is_first_pm(m["id"], m["thread_id"])
|
||
# find the title from list table
|
||
pm_thread =
|
||
mysql_query(
|
||
"
|
||
SELECT plid thread_id, subject
|
||
FROM #{table_name "ucenter_pm_lists"}
|
||
WHERE plid = #{m["thread_id"]};",
|
||
).first
|
||
mapped[:title] = pm_thread["subject"]
|
||
mapped[:archetype] = Archetype.private_message
|
||
|
||
# Find the users who are part of this private message.
|
||
import_user_ids =
|
||
mysql_query(
|
||
"
|
||
SELECT plid thread_id, uid user_id
|
||
FROM #{table_name "ucenter_pm_members"}
|
||
WHERE plid = #{m["thread_id"]};
|
||
",
|
||
).map { |r| r["user_id"] }.uniq
|
||
|
||
mapped[:target_usernames] = import_user_ids
|
||
.map! do |import_user_id|
|
||
if import_user_id.to_s == m["user_id"].to_s
|
||
nil
|
||
else
|
||
User.find_by(id: user_id_from_imported_user_id(import_user_id)).try(:username)
|
||
end
|
||
end
|
||
.compact
|
||
|
||
if mapped[:target_usernames].empty? # pm with yourself?
|
||
skip = true
|
||
puts "Skipping pm:#{m["id"]} due to no target"
|
||
else
|
||
@first_post_id_by_topic_id[thread_id] = mapped[:id]
|
||
end
|
||
else
|
||
parent = topic_lookup_from_imported_post_id(@first_post_id_by_topic_id[thread_id])
|
||
if parent
|
||
mapped[:topic_id] = parent[:topic_id]
|
||
else
|
||
puts "Parent post pm thread:#{thread_id} doesn't exist. Skipping #{m["id"]}: #{m["message"][0..40]}"
|
||
skip = true
|
||
end
|
||
end
|
||
|
||
skip ? nil : mapped
|
||
end
|
||
end
|
||
end
|
||
|
||
# search for first pm id for the series of pm
|
||
def is_first_pm(pm_id, thread_id)
|
||
result =
|
||
mysql_query(
|
||
"
|
||
SELECT pmid id
|
||
FROM #{table_name "ucenter_pm_indexes"}
|
||
WHERE plid = #{thread_id}
|
||
ORDER BY id",
|
||
)
|
||
result.first["id"].to_s == pm_id.to_s
|
||
end
|
||
|
||
def process_and_upload_inline_images(raw)
|
||
inline_image_regex = %r{\[img\]([\s\S]*?)\[/img\]}
|
||
|
||
s = raw.dup
|
||
|
||
s.gsub!(inline_image_regex) do |d|
|
||
matches = inline_image_regex.match(d)
|
||
data = matches[1]
|
||
|
||
upload, filename = upload_inline_image data
|
||
upload ? html_for_upload(upload, filename) : nil
|
||
end
|
||
end
|
||
|
||
def process_discuzx_post(raw, import_id)
|
||
# raw = process_and_upload_inline_images(raw)
|
||
s = raw.dup
|
||
|
||
# Strip the quote
|
||
# [quote] quotation includes the topic which is the same as reply to in Discourse
|
||
# We get the pid to find the post number the post reply to. So it can be stripped
|
||
s =
|
||
s.gsub(
|
||
%r{\[b\]回复 \[url=forum.php\?mod=redirect&goto=findpost&pid=\d+&ptid=\d+\].* 的帖子\[/url\]\[/b\]}i,
|
||
"",
|
||
).strip
|
||
s =
|
||
s.gsub(
|
||
%r{\[b\]回复 \[url=https?://#{ORIGINAL_SITE_PREFIX}/redirect.php\?goto=findpost&pid=\d+&ptid=\d+\].*?\[/url\].*?\[/b\]}i,
|
||
"",
|
||
).strip
|
||
|
||
s.gsub!(%r{\[quote\](.*)?\[/quote\]}im) do |matched|
|
||
content = $1
|
||
post_import_id = find_post_id_by_quote_number(content)
|
||
if post_import_id
|
||
post_id = post_id_from_imported_post_id(post_import_id.to_i)
|
||
if (post = Post.find_by(id: post_id))
|
||
"[quote=\"#{post.user.username}\", post: #{post.post_number}, topic: #{post.topic_id}]\n#{content}\n[/quote]"
|
||
else
|
||
puts "post #{import_id} quote to not exists post #{post_import_id}, skip reply"
|
||
matched[0]
|
||
end
|
||
else
|
||
matched[0]
|
||
end
|
||
end
|
||
|
||
s.gsub!(
|
||
%r{\[size=2\]\[color=#999999\].*? 发表于 [\d\-\: ]*\[/color\] \[url=forum.php\?mod=redirect&goto=findpost&pid=\d+&ptid=\d+\].*?\[/url\]\[/size\]}i,
|
||
"",
|
||
)
|
||
s.gsub!(
|
||
%r{\[size=2\]\[color=#999999\].*? 发表于 [\d\-\: ]*\[/color\] \[url=https?://#{ORIGINAL_SITE_PREFIX}/redirect.php\?goto=findpost&pid=\d+&ptid=\d+\].*?\[/url\]\[/size\]}i,
|
||
"",
|
||
)
|
||
|
||
# convert quote
|
||
s.gsub!(%r{\[quote\](.*?)\[/quote\]}m) { "\n" + ($1.strip).gsub(/^/, "> ") + "\n" }
|
||
|
||
# truncate line space, preventing line starting with many blanks to be parsed as code blocks
|
||
s.gsub!(/^ {4,}/, " ")
|
||
|
||
# TODO: Much better to use bbcode-to-md gem
|
||
# Convert image bbcode with width and height
|
||
s.gsub!(
|
||
%r{\[img[^\]]*\]https?://#{ORIGINAL_SITE_PREFIX}/(.*)\[/img\]}i,
|
||
'[x-attach]\1[/x-attach]',
|
||
) # dont convert attachment
|
||
s.gsub!(
|
||
%r{<img[^>]*src="https?://#{ORIGINAL_SITE_PREFIX}/(.*)".*?>}i,
|
||
'[x-attach]\1[/x-attach]',
|
||
) # dont convert attachment
|
||
s.gsub!(
|
||
%r{\[img[^\]]*\]https?://www\.touhou\.cc/blog/(.*)\[/img\]}i,
|
||
'[x-attach]../blog/\1[/x-attach]',
|
||
) # 私货
|
||
s.gsub!(
|
||
%r{\[img[^\]]*\]https?://www\.touhou\.cc/ucenter/avatar.php\?uid=(\d+)[^\]]*\[/img\]}i,
|
||
) { "[x-attach]#{discuzx_avatar_fullpath($1, false)[0]}[/x-attach]" } # 私货
|
||
s.gsub!(%r{\[img=(\d+),(\d+)\]([^\]]*)\[/img\]}i, '<img width="\1" height="\2" src="\3">')
|
||
s.gsub!(%r{\[img\]([^\]]*)\[/img\]}i, '<img src="\1">')
|
||
|
||
s.gsub!(
|
||
%r{\[qq\]([^\]]*)\[/qq\]}i,
|
||
'<a href="http://wpa.qq.com/msgrd?V=3&Uin=\1&Site=[Discuz!]&from=discuz&Menu=yes" target="_blank"><!--<img src="static/image/common/qq_big.gif" border="0">-->QQ 交谈</a>',
|
||
)
|
||
|
||
s.gsub!(%r{\[email\]([^\]]*)\[/email\]}i, '[url=mailto:\1]\1[/url]') # bbcode-to-md can convert it
|
||
s.gsub!(%r{\[s\]([^\]]*)\[/s\]}i, '<s>\1</s>')
|
||
s.gsub!(%r{\[sup\]([^\]]*)\[/sup\]}i, '<sup>\1</sup>')
|
||
s.gsub!(%r{\[sub\]([^\]]*)\[/sub\]}i, '<sub>\1</sub>')
|
||
s.gsub!(/\[hr\]/i, "\n---\n")
|
||
|
||
# remove the media tag
|
||
s.gsub!(%r{\[/?media[^\]]*\]}i, "\n")
|
||
s.gsub!(%r{\[/?flash[^\]]*\]}i, "\n")
|
||
s.gsub!(%r{\[/?audio[^\]]*\]}i, "\n")
|
||
s.gsub!(%r{\[/?video[^\]]*\]}i, "\n")
|
||
|
||
# Remove the font, p and backcolor tag
|
||
# Discourse doesn't support the font tag
|
||
s.gsub!(/\[font=[^\]]*?\]/i, "")
|
||
s.gsub!(%r{\[/font\]}i, "")
|
||
s.gsub!(/\[p=[^\]]*?\]/i, "")
|
||
s.gsub!(%r{\[/p\]}i, "")
|
||
s.gsub!(/\[backcolor=[^\]]*?\]/i, "")
|
||
s.gsub!(%r{\[/backcolor\]}i, "")
|
||
|
||
# Remove the size tag
|
||
# I really have no idea what is this
|
||
s.gsub!(/\[size=[^\]]*?\]/i, "")
|
||
s.gsub!(%r{\[/size\]}i, "")
|
||
|
||
# Remove the color tag
|
||
s.gsub!(/\[color=[^\]]*?\]/i, "")
|
||
s.gsub!(%r{\[/color\]}i, "")
|
||
|
||
# Remove the hide tag
|
||
s.gsub!(%r{\[/?hide\]}i, "")
|
||
s.gsub!(%r{\[/?free[^\]]*\]}i, "\n")
|
||
|
||
# Remove the align tag
|
||
# still don't know what it is
|
||
s.gsub!(/\[align=[^\]]*?\]/i, "\n")
|
||
s.gsub!(%r{\[/align\]}i, "\n")
|
||
s.gsub!(/\[float=[^\]]*?\]/i, "\n")
|
||
s.gsub!(%r{\[/float\]}i, "\n")
|
||
|
||
# Convert code
|
||
s.gsub!(%r{\[/?code\]}i, "\n```\n")
|
||
|
||
# The edit notice should be removed
|
||
# example: 本帖最后由 Helloworld 于 2015-1-28 22:05 编辑
|
||
s.gsub!(%r{\[i=s\] 本帖最后由[\s\S]*?编辑 \[/i\]}, "")
|
||
|
||
# Convert the custom smileys to emojis
|
||
# `{:cry:}` to `:cry`
|
||
s.gsub!(/\{(\:\S*?\:)\}/, '\1')
|
||
|
||
# Replace internal forum links that aren't in the <!-- l --> format
|
||
# convert list tags to ul and list=1 tags to ol
|
||
# (basically, we're only missing list=a here...)
|
||
s.gsub!(%r{\[list\](.*?)\[/list:u\]}m, '[ul]\1[/ul]')
|
||
s.gsub!(%r{\[list=1\](.*?)\[/list:o\]}m, '[ol]\1[/ol]')
|
||
# convert *-tags to li-tags so bbcode-to-md can do its magic on phpBB's lists:
|
||
s.gsub!(%r{\[\*\](.*?)\[/\*:m\]}, '[li]\1[/li]')
|
||
|
||
# Discuz can create PM out of a post, which will generates like
|
||
# [url=http://example.com/forum.php?mod=redirect&goto=findpost&pid=111&ptid=11][b]关于您在“主题名称”的帖子[/b][/url]
|
||
s.gsub!(pm_url_regexp) { |discuzx_link| replace_internal_link(discuzx_link, $1) }
|
||
|
||
# [url][b]text[/b][/url] to **[url]text[/url]**
|
||
s.gsub!(%r{(\[url=[^\[\]]*?\])\[b\](\S*)\[/b\](\[/url\])}, '**\1\2\3**')
|
||
|
||
@internal_url_regexps.each do |internal_url_regexp|
|
||
s.gsub!(internal_url_regexp) do |discuzx_link|
|
||
replace_internal_link(
|
||
discuzx_link,
|
||
(
|
||
begin
|
||
$~[:tid].to_i
|
||
rescue StandardError
|
||
nil
|
||
end
|
||
),
|
||
(
|
||
begin
|
||
$~[:pid].to_i
|
||
rescue StandardError
|
||
nil
|
||
end
|
||
),
|
||
(
|
||
begin
|
||
$~[:fid].to_i
|
||
rescue StandardError
|
||
nil
|
||
end
|
||
),
|
||
(
|
||
begin
|
||
$~[:action]
|
||
rescue StandardError
|
||
nil
|
||
end
|
||
),
|
||
)
|
||
end
|
||
end
|
||
|
||
# @someone without the url
|
||
s.gsub!(%r{@\[url=[^\[\]]*?\](\S*)\[/url\]}i, '@\1')
|
||
|
||
s.scan(%r{http(?:s)?://#{ORIGINAL_SITE_PREFIX.gsub(".", '\.')}/[^\[\]\s]*}) do |link|
|
||
puts "WARNING: post #{import_id} can't replace internal url #{link}"
|
||
end
|
||
|
||
s.strip
|
||
end
|
||
|
||
def replace_internal_link(
|
||
discuzx_link,
|
||
import_topic_id,
|
||
import_post_id,
|
||
import_category_id,
|
||
action
|
||
)
|
||
if import_post_id
|
||
post_id = post_id_from_imported_post_id import_post_id
|
||
if post_id
|
||
post = Post.find post_id
|
||
return post.full_url if post
|
||
end
|
||
end
|
||
|
||
if import_topic_id
|
||
results =
|
||
mysql_query(
|
||
"SELECT pid
|
||
FROM #{table_name "forum_post"}
|
||
WHERE tid = #{import_topic_id} AND first
|
||
LIMIT 1",
|
||
)
|
||
|
||
return discuzx_link if results.size.zero?
|
||
|
||
linked_post_id = results.first["pid"]
|
||
lookup = topic_lookup_from_imported_post_id(linked_post_id)
|
||
|
||
if lookup
|
||
return "#{NEW_SITE_PREFIX}#{lookup[:url]}"
|
||
else
|
||
return discuzx_link
|
||
end
|
||
end
|
||
|
||
if import_category_id
|
||
category_id = category_id_from_imported_category_id import_category_id
|
||
if category_id
|
||
category = Category.find category_id
|
||
return category.url if category
|
||
end
|
||
end
|
||
|
||
case action
|
||
when "index"
|
||
return "#{NEW_SITE_PREFIX}/"
|
||
when "stat", "stats", "ranklist"
|
||
return "#{NEW_SITE_PREFIX}/users"
|
||
end
|
||
|
||
discuzx_link
|
||
end
|
||
|
||
def pm_url_regexp
|
||
@pm_url_regexp ||=
|
||
Regexp.new(
|
||
"http(?:s)?://#{ORIGINAL_SITE_PREFIX.gsub(".", '\.')}/forum\\.php\\?mod=redirect&goto=findpost&pid=\\d+&ptid=(\\d+)",
|
||
)
|
||
end
|
||
|
||
# This step is done separately because it can take multiple attempts to get right (because of
|
||
# missing files, wrong paths, authorized extensions, etc.).
|
||
def import_attachments
|
||
setting = AUTHORIZED_EXTENSIONS.join("|")
|
||
SiteSetting.authorized_extensions = setting if setting != SiteSetting.authorized_extensions
|
||
|
||
attachment_regex = %r{\[attach\](\d+)\[/attach\]}
|
||
attachment_link_regex = %r{\[x-attach\](.+)\[/x-attach\]}
|
||
|
||
current_count = 0
|
||
total_count =
|
||
mysql_query("SELECT count(*) count FROM #{table_name "forum_post"};").first["count"]
|
||
|
||
success_count = 0
|
||
fail_count = 0
|
||
|
||
puts "", "Importing attachments...", ""
|
||
|
||
Post.find_each do |post|
|
||
next unless post.custom_fields["import_id"] == post.custom_fields["import_id"].to_i.to_s
|
||
|
||
user = post.user
|
||
|
||
current_count += 1
|
||
print_status current_count, total_count
|
||
|
||
new_raw = post.raw.dup
|
||
|
||
inline_attachments = []
|
||
|
||
new_raw.gsub!(attachment_regex) do |s|
|
||
attachment_id = $1.to_i
|
||
inline_attachments.push attachment_id
|
||
|
||
upload, filename = find_upload(user, post, attachment_id)
|
||
unless upload
|
||
fail_count += 1
|
||
next
|
||
end
|
||
|
||
html_for_upload(upload, filename)
|
||
end
|
||
new_raw.gsub!(attachment_link_regex) do |s|
|
||
attachment_file = $1
|
||
|
||
filename = File.basename(attachment_file)
|
||
upload = create_upload(user.id, File.join(DISCUZX_BASE_DIR, attachment_file), filename)
|
||
unless upload
|
||
fail_count += 1
|
||
next
|
||
end
|
||
|
||
html_for_upload(upload, filename)
|
||
end
|
||
|
||
sql =
|
||
"SELECT aid
|
||
FROM #{table_name "forum_attachment"}
|
||
WHERE pid = #{post.custom_fields["import_id"]}"
|
||
sql = "#{sql} AND aid NOT IN (#{inline_attachments.join(",")})" if !inline_attachments.empty?
|
||
|
||
results = mysql_query(sql)
|
||
|
||
results.each do |attachment|
|
||
attachment_id = attachment["aid"]
|
||
upload, filename = find_upload(user, post, attachment_id)
|
||
unless upload
|
||
fail_count += 1
|
||
next
|
||
end
|
||
html = html_for_upload(upload, filename)
|
||
unless new_raw.include? html
|
||
new_raw << "\n"
|
||
new_raw << html
|
||
end
|
||
end
|
||
|
||
if new_raw != post.raw
|
||
PostRevisor.new(post).revise!(
|
||
post.user,
|
||
{ raw: new_raw },
|
||
bypass_bump: true,
|
||
edit_reason: "从 Discuz 中导入附件",
|
||
)
|
||
end
|
||
|
||
success_count += 1
|
||
end
|
||
|
||
puts "", ""
|
||
puts "succeeded: #{success_count}"
|
||
puts " failed: #{fail_count}" if fail_count > 0
|
||
puts ""
|
||
end
|
||
|
||
# Create the full path to the discuz avatar specified from user id
|
||
def discuzx_avatar_fullpath(user_id, absolute = true)
|
||
padded_id = user_id.to_s.rjust(9, "0")
|
||
|
||
part_1 = padded_id[0..2]
|
||
part_2 = padded_id[3..4]
|
||
part_3 = padded_id[5..6]
|
||
part_4 = padded_id[-2..-1]
|
||
file_name = "#{part_4}_avatar_big.jpg"
|
||
|
||
if absolute
|
||
[File.join(DISCUZX_BASE_DIR, AVATAR_DIR, part_1, part_2, part_3, file_name), file_name]
|
||
else
|
||
[File.join(AVATAR_DIR, part_1, part_2, part_3, file_name), file_name]
|
||
end
|
||
end
|
||
|
||
# post id is in the quote block
|
||
def find_post_id_by_quote_number(raw)
|
||
case raw
|
||
when /\[url=forum.php\?mod=redirect&goto=findpost&pid=(\d+)&ptid=\d+\]/ #standard
|
||
$1
|
||
when %r{\[url=https?://#{ORIGINAL_SITE_PREFIX}/redirect.php\?goto=findpost&pid=(\d+)&ptid=\d+\]} # old discuz 7 format
|
||
$1
|
||
when %r{\[quote\][\S\s]*pid=(\d+)[\S\s]*\[/quote\]} # quote
|
||
$1
|
||
end
|
||
end
|
||
|
||
# for some reason, discuz inlined some png file
|
||
# the corresponding image stored is broken in a way
|
||
def upload_inline_image(data)
|
||
return unless data
|
||
|
||
puts "Creating inline image"
|
||
|
||
encoded_photo = data["data:image/png;base64,".length..-1]
|
||
if encoded_photo
|
||
raw_file = Base64.decode64(encoded_photo)
|
||
else
|
||
puts "Error parsed inline photo", data[0..20]
|
||
return
|
||
end
|
||
|
||
real_filename = "#{SecureRandom.hex}.png"
|
||
filename = Tempfile.new(%w[inline .png])
|
||
begin
|
||
filename.binmode
|
||
filename.write(raw_file)
|
||
filename.rewind
|
||
|
||
upload = create_upload(Discourse::SYSTEM_USER_ID, filename, real_filename)
|
||
ensure
|
||
begin
|
||
filename.close
|
||
rescue StandardError
|
||
nil
|
||
end
|
||
begin
|
||
filename.unlink
|
||
rescue StandardError
|
||
nil
|
||
end
|
||
end
|
||
|
||
if upload.nil? || !upload.valid?
|
||
puts "Upload not valid :("
|
||
puts upload.errors.inspect if upload
|
||
return nil
|
||
end
|
||
|
||
[upload, real_filename]
|
||
end
|
||
|
||
# find the uploaded file and real name from the db
|
||
def find_upload(user, post, upload_id)
|
||
attachment_table = table_name "forum_attachment"
|
||
# search for table id
|
||
sql =
|
||
"SELECT a.pid post_id,
|
||
a.aid upload_id,
|
||
a.tableid table_id
|
||
FROM #{attachment_table} a
|
||
WHERE a.pid = #{post.custom_fields["import_id"]}
|
||
AND a.aid = #{upload_id};"
|
||
results = mysql_query(sql)
|
||
|
||
unless (meta_data = results.first)
|
||
puts "Couldn't find forum_attachment record meta data for post.id = #{post.id}, import_id = #{post.custom_fields["import_id"]}"
|
||
return nil
|
||
end
|
||
|
||
# search for uploaded file meta data
|
||
sql =
|
||
"SELECT a.pid post_id,
|
||
a.aid upload_id,
|
||
a.tid topic_id,
|
||
a.uid user_id,
|
||
a.dateline uploaded_time,
|
||
a.filename real_filename,
|
||
a.attachment attachment_path,
|
||
a.remote is_remote,
|
||
a.description description,
|
||
a.isimage is_image,
|
||
a.thumb is_thumb
|
||
FROM #{attachment_table}_#{meta_data["table_id"]} a
|
||
WHERE a.aid = #{upload_id};"
|
||
results = mysql_query(sql)
|
||
|
||
unless (row = results.first)
|
||
puts "Couldn't find attachment record for post.id = #{post.id}, import_id = #{post.custom_fields["import_id"]}"
|
||
return nil
|
||
end
|
||
|
||
filename = File.join(DISCUZX_BASE_DIR, ATTACHMENT_DIR, row["attachment_path"])
|
||
unless File.exist?(filename)
|
||
puts "Attachment file doesn't exist: #{filename}"
|
||
return nil
|
||
end
|
||
real_filename = row["real_filename"]
|
||
real_filename.prepend SecureRandom.hex if real_filename[0] == "."
|
||
upload = create_upload(user.id, filename, real_filename)
|
||
|
||
if upload.nil? || !upload.valid?
|
||
puts "Upload not valid :("
|
||
puts upload.errors.inspect if upload
|
||
return nil
|
||
end
|
||
|
||
[upload, real_filename]
|
||
rescue Mysql2::Error => e
|
||
puts "SQL Error"
|
||
puts e.message
|
||
puts sql
|
||
nil
|
||
end
|
||
|
||
def first_exists(*items)
|
||
items.find { |item| !item.blank? } || ""
|
||
end
|
||
|
||
def mysql_query(sql)
|
||
@client.query(sql, cache_rows: false)
|
||
end
|
||
end
|
||
|
||
ImportScripts::DiscuzX.new.perform
|