mirror of
https://github.com/discourse/discourse.git
synced 2024-11-23 20:54:31 +08:00
0c52537f10
We like to stay as close as possible to latest with rubocop cause the cops get better. This update required some code changes, specifically the default is to avoid explicit returns where implicit is done Also this renames a few rules
962 lines
36 KiB
Ruby
962 lines
36 KiB
Ruby
# encoding: utf-8
|
||
# frozen_string_literal: true
|
||
|
||
#
|
||
# Author: Erick Guan <fantasticfears@gmail.com>
|
||
#
|
||
# This script import the data from latest Discuz! X
|
||
# Should work among Discuz! X3.x
|
||
# This script is tested only on Simplified Chinese Discuz! X instances
|
||
# If you want to import data other than Simplified Chinese, email me.
|
||
|
||
require 'php_serialize'
|
||
require 'miro'
|
||
require 'mysql2'
|
||
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
|
||
|
||
class ImportScripts::DiscuzX < ImportScripts::Base
|
||
|
||
DISCUZX_DB = "ultrax"
|
||
DB_TABLE_PREFIX = 'pre_'
|
||
BATCH_SIZE = 1000
|
||
ORIGINAL_SITE_PREFIX = "oldsite.example.com/forums" # without http(s)://
|
||
NEW_SITE_PREFIX = "http://discourse.example.com" # with http:// or https://
|
||
|
||
# Set DISCUZX_BASE_DIR to the base directory of your discuz installation.
|
||
DISCUZX_BASE_DIR = '/var/www/discuz/upload'
|
||
AVATAR_DIR = '/uc_server/data/avatar'
|
||
ATTACHMENT_DIR = '/data/attachment/forum'
|
||
AUTHORIZED_EXTENSIONS = ['jpg', 'jpeg', 'png', 'gif', 'zip', 'rar', 'pdf']
|
||
|
||
def initialize
|
||
super
|
||
|
||
@client = Mysql2::Client.new(
|
||
host: "localhost",
|
||
username: "root",
|
||
#password: "password",
|
||
database: DISCUZX_DB
|
||
)
|
||
@first_post_id_by_topic_id = {}
|
||
|
||
@internal_url_regexps = [
|
||
/http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/forum\.php\?mod=viewthread(?:&|&)tid=(?<tid>\d+)(?:[^\[\]\s]*)(?:pid=?(?<pid>\d+))?(?:[^\[\]\s]*)/,
|
||
/http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/viewthread\.php\?tid=(?<tid>\d+)(?:[^\[\]\s]*)(?:pid=?(?<pid>\d+))?(?:[^\[\]\s]*)/,
|
||
/http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/forum\.php\?mod=redirect(?:&|&)goto=findpost(?:&|&)pid=(?<pid>\d+)(?:&|&)ptid=(?<tid>\d+)(?:[^\[\]\s]*)/,
|
||
/http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/redirect\.php\?goto=findpost(?:&|&)pid=(?<pid>\d+)(?:&|&)ptid=(?<tid>\d+)(?:[^\[\]\s]*)/,
|
||
/http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/forumdisplay\.php\?fid=(?<fid>\d+)(?:[^\[\]\s]*)/,
|
||
/http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/forum\.php\?mod=forumdisplay(?:&|&)fid=(?<fid>\d+)(?:[^\[\]\s]*)/,
|
||
/http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/(?<action>index)\.php(?:[^\[\]\s]*)/,
|
||
/http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/(?<action>stats)\.php(?:[^\[\]\s]*)/,
|
||
/http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/misc.php\?mod=(?<mod>stat|ranklist)(?:[^\[\]\s]*)/
|
||
]
|
||
|
||
end
|
||
|
||
def execute
|
||
get_knowledge_about_duplicated_email
|
||
import_users
|
||
import_categories
|
||
import_posts
|
||
import_private_messages
|
||
import_attachments
|
||
end
|
||
|
||
# add the prefix to the table name
|
||
def table_name(name = nil)
|
||
DB_TABLE_PREFIX + name
|
||
end
|
||
|
||
# find which group members can be granted as admin
|
||
def get_knowledge_about_group
|
||
group_table = table_name 'common_usergroup'
|
||
result = mysql_query(
|
||
"SELECT groupid group_id, radminid role_id
|
||
FROM #{group_table};")
|
||
@moderator_group_id = []
|
||
@admin_group_id = []
|
||
#@banned_group_id = [4,5] # 禁止的用户及其帖子均不导入,如果你想导入这些用户和帖子,请把这个数组清空。
|
||
|
||
result.each do |group|
|
||
case group['role_id']
|
||
when 1 # 管理员
|
||
@admin_group_id << group['group_id']
|
||
when 2, 3 # 超级版主、版主。如果你不希望原普通版主成为Discourse版主,把3去掉。
|
||
@moderator_group_id << group['group_id']
|
||
end
|
||
end
|
||
end
|
||
|
||
def get_knowledge_about_category_slug
|
||
@category_slug = {}
|
||
results = mysql_query("SELECT svalue value
|
||
FROM #{table_name 'common_setting'}
|
||
WHERE skey = 'forumkeys'")
|
||
|
||
return if results.size < 1
|
||
value = results.first['value']
|
||
|
||
return if value.blank?
|
||
|
||
PHP.unserialize(value).each do |category_import_id, slug|
|
||
next if slug.blank?
|
||
@category_slug[category_import_id] = slug
|
||
end
|
||
end
|
||
|
||
def get_knowledge_about_duplicated_email
|
||
@duplicated_email = {}
|
||
results = mysql_query(
|
||
"select a.uid uid, b.uid import_id from pre_common_member a
|
||
join (select uid, email from pre_common_member group by email having count(email) > 1 order by uid asc) b USING(email)
|
||
where a.uid != b.uid")
|
||
|
||
users = @lookup.instance_variable_get :@users
|
||
|
||
results.each do |row|
|
||
@duplicated_email[row['uid']] = row['import_id']
|
||
user_id = users[row['import_id']]
|
||
if user_id
|
||
users[row['uid']] = user_id
|
||
end
|
||
end
|
||
end
|
||
|
||
def import_users
|
||
puts '', "creating users"
|
||
|
||
get_knowledge_about_group
|
||
|
||
sensitive_user_table = table_name 'ucenter_members'
|
||
user_table = table_name 'common_member'
|
||
profile_table = table_name 'common_member_profile'
|
||
status_table = table_name 'common_member_status'
|
||
forum_table = table_name 'common_member_field_forum'
|
||
home_table = table_name 'common_member_field_home'
|
||
total_count = mysql_query("SELECT count(*) count FROM #{user_table};").first['count']
|
||
|
||
batches(BATCH_SIZE) do |offset|
|
||
results = mysql_query(
|
||
"SELECT u.uid id, u.username username, u.email email, u.groupid group_id,
|
||
su.regdate regdate, su.password password_hash, su.salt salt,
|
||
s.regip regip, s.lastip last_visit_ip, s.lastvisit last_visit_time, s.lastpost last_posted_at, s.lastsendmail last_emailed_at,
|
||
u.emailstatus email_confirmed, u.avatarstatus avatar_exists,
|
||
p.site website, p.address address, p.bio bio, p.realname realname, p.qq qq,
|
||
p.resideprovince resideprovince, p.residecity residecity, p.residedist residedist, p.residecommunity residecommunity,
|
||
p.resideprovince birthprovince, p.birthcity birthcity, p.birthdist birthdist, p.birthcommunity birthcommunity,
|
||
h.spacecss spacecss, h.spacenote spacenote,
|
||
f.customstatus customstatus, f.sightml sightml
|
||
FROM #{user_table} u
|
||
LEFT JOIN #{sensitive_user_table} su USING(uid)
|
||
LEFT JOIN #{profile_table} p USING(uid)
|
||
LEFT JOIN #{status_table} s USING(uid)
|
||
LEFT JOIN #{forum_table} f USING(uid)
|
||
LEFT JOIN #{home_table} h USING(uid)
|
||
ORDER BY u.uid ASC
|
||
LIMIT #{BATCH_SIZE}
|
||
OFFSET #{offset};")
|
||
|
||
break if results.size < 1
|
||
|
||
# TODO: breaks the scipt reported by some users
|
||
# next if all_records_exist? :users, users.map {|u| u["id"].to_i}
|
||
|
||
create_users(results, total: total_count, offset: offset) do |user|
|
||
{ id: user['id'],
|
||
email: user['email'],
|
||
username: user['username'],
|
||
name: first_exists(user['realname'], user['customstatus'], user['username']),
|
||
import_pass: user['password_hash'],
|
||
active: true,
|
||
salt: user['salt'],
|
||
# TODO: title: user['customstatus'], # move custom title to name since discourse can't let user custom title https://meta.discourse.org/t/let-users-custom-their-title/37626
|
||
created_at: user['regdate'] ? Time.zone.at(user['regdate']) : nil,
|
||
registration_ip_address: user['regip'],
|
||
ip_address: user['last_visit_ip'],
|
||
last_seen_at: user['last_visit_time'],
|
||
last_emailed_at: user['last_emailed_at'],
|
||
last_posted_at: user['last_posted_at'],
|
||
moderator: @moderator_group_id.include?(user['group_id']),
|
||
admin: @admin_group_id.include?(user['group_id']),
|
||
website: (user['website'] && user['website'].include?('.')) ? user['website'].strip : (user['qq'] && user['qq'].strip == (user['qq'].strip.to_i) && user['qq'].strip.to_i > (10000)) ? 'http://user.qzone.qq.com/' + user['qq'].strip : nil,
|
||
bio_raw: first_exists((user['bio'] && CGI.unescapeHTML(user['bio'])), user['sightml'], user['spacenote']).strip[0, 3000],
|
||
location: first_exists(user['address'], (!user['resideprovince'].blank? ? [user['resideprovince'], user['residecity'], user['residedist'], user['residecommunity']] : [user['birthprovince'], user['birthcity'], user['birthdist'], user['birthcommunity']]).reject { |location|location.blank? }.join(' ')),
|
||
post_create_action: lambda do |newmember|
|
||
if user['avatar_exists'] == (1) && newmember.uploaded_avatar_id.blank?
|
||
path, filename = discuzx_avatar_fullpath(user['id'])
|
||
if path
|
||
begin
|
||
upload = create_upload(newmember.id, path, filename)
|
||
if !upload.nil? && upload.persisted?
|
||
newmember.import_mode = false
|
||
newmember.create_user_avatar
|
||
newmember.import_mode = true
|
||
newmember.user_avatar.update(custom_upload_id: upload.id)
|
||
newmember.update(uploaded_avatar_id: upload.id)
|
||
else
|
||
puts "Error: Upload did not persist!"
|
||
end
|
||
rescue SystemCallError => err
|
||
puts "Could not import avatar: #{err.message}"
|
||
end
|
||
end
|
||
end
|
||
if !user['spacecss'].blank? && newmember.user_profile.profile_background_upload.blank?
|
||
# profile background
|
||
if matched = user['spacecss'].match(/body\s*{[^}]*url\('?(.+?)'?\)/i)
|
||
body_background = matched[1].split(ORIGINAL_SITE_PREFIX, 2).last
|
||
end
|
||
if matched = user['spacecss'].match(/#hd\s*{[^}]*url\('?(.+?)'?\)/i)
|
||
header_background = matched[1].split(ORIGINAL_SITE_PREFIX, 2).last
|
||
end
|
||
if matched = user['spacecss'].match(/.blocktitle\s*{[^}]*url\('?(.+?)'?\)/i)
|
||
blocktitle_background = matched[1].split(ORIGINAL_SITE_PREFIX, 2).last
|
||
end
|
||
if matched = user['spacecss'].match(/#ct\s*{[^}]*url\('?(.+?)'?\)/i)
|
||
content_background = matched[1].split(ORIGINAL_SITE_PREFIX, 2).last
|
||
end
|
||
|
||
if body_background || header_background || blocktitle_background || content_background
|
||
profile_background = first_exists(header_background, body_background, content_background, blocktitle_background)
|
||
card_background = first_exists(content_background, body_background, header_background, blocktitle_background)
|
||
upload = create_upload(newmember.id, File.join(DISCUZX_BASE_DIR, profile_background), File.basename(profile_background))
|
||
if upload
|
||
newmember.user_profile.upload_profile_background upload
|
||
else
|
||
puts "WARNING: #{user['username']} (UID: #{user['id']}) profile_background file did not persist!"
|
||
end
|
||
upload = create_upload(newmember.id, File.join(DISCUZX_BASE_DIR, card_background), File.basename(card_background))
|
||
if upload
|
||
newmember.user_profile.upload_card_background upload
|
||
else
|
||
puts "WARNING: #{user['username']} (UID: #{user['id']}) card_background file did not persist!"
|
||
end
|
||
end
|
||
end
|
||
|
||
# we don't send email to the unconfirmed user
|
||
newmember.update(email_digests: user['email_confirmed'] == 1) if newmember.email_digests
|
||
newmember.update(name: '') if !newmember.name.blank? && newmember.name == (newmember.username)
|
||
end
|
||
}
|
||
end
|
||
end
|
||
end
|
||
|
||
def import_categories
|
||
puts '', "creating categories"
|
||
|
||
get_knowledge_about_category_slug
|
||
|
||
forums_table = table_name 'forum_forum'
|
||
forums_data_table = table_name 'forum_forumfield'
|
||
|
||
results = mysql_query("
|
||
SELECT f.fid id, f.fup parent_id, f.name, f.type type, f.status status, f.displayorder position,
|
||
d.description description, d.rules rules, d.icon, d.extra extra
|
||
FROM #{forums_table} f
|
||
LEFT JOIN #{forums_data_table} d USING(fid)
|
||
ORDER BY parent_id ASC, id ASC
|
||
")
|
||
|
||
max_position = Category.all.max_by(&:position).position
|
||
create_categories(results) do |row|
|
||
next if row['type'] == ('group') || row['status'] == (2) # or row['status'].to_i == 3 # 如果不想导入群组,取消注释
|
||
extra = PHP.unserialize(row['extra']) if !row['extra'].blank?
|
||
if extra && !extra["namecolor"].blank?
|
||
color = extra["namecolor"][1, 6]
|
||
end
|
||
|
||
Category.all.max_by(&:position).position
|
||
|
||
h = {
|
||
id: row['id'],
|
||
name: row['name'],
|
||
description: row['description'],
|
||
position: row['position'].to_i + max_position,
|
||
color: color,
|
||
post_create_action: lambda do |category|
|
||
if slug = @category_slug[row['id']]
|
||
category.update(slug: slug)
|
||
end
|
||
|
||
raw = process_discuzx_post(row['rules'], nil)
|
||
if @bbcode_to_md
|
||
raw = raw.bbcode_to_md(false) rescue raw
|
||
end
|
||
category.topic.posts.first.update_attribute(:raw, raw)
|
||
if !row['icon'].empty?
|
||
upload = create_upload(Discourse::SYSTEM_USER_ID, File.join(DISCUZX_BASE_DIR, ATTACHMENT_DIR, '../common', row['icon']), File.basename(row['icon']))
|
||
if upload
|
||
category.uploaded_logo_id = upload.id
|
||
# FIXME: I don't know how to get '/shared' by script. May change to Rails.root
|
||
category.color = Miro::DominantColors.new(File.join('/shared', upload.url)).to_hex.first[1, 6] if !color
|
||
category.save!
|
||
end
|
||
end
|
||
|
||
if row['status'] == (0) || row['status'] == (3)
|
||
SiteSetting.default_categories_muted = [SiteSetting.default_categories_muted, category.id].reject(&:blank?).join("|")
|
||
end
|
||
category
|
||
end
|
||
}
|
||
if row['parent_id'].to_i > 0
|
||
h[:parent_category_id] = category_id_from_imported_category_id(row['parent_id'])
|
||
end
|
||
h
|
||
end
|
||
end
|
||
|
||
def import_posts
|
||
puts "", "creating topics and posts"
|
||
|
||
users_table = table_name 'common_member'
|
||
posts_table = table_name 'forum_post'
|
||
topics_table = table_name 'forum_thread'
|
||
|
||
total_count = mysql_query("SELECT count(*) count FROM #{posts_table}").first['count']
|
||
|
||
batches(BATCH_SIZE) do |offset|
|
||
results = mysql_query("
|
||
SELECT p.pid id,
|
||
p.tid topic_id,
|
||
t.fid category_id,
|
||
t.subject title,
|
||
p.authorid user_id,
|
||
p.message raw,
|
||
p.dateline post_time,
|
||
p2.pid first_id,
|
||
p.invisible status,
|
||
t.special special
|
||
FROM #{posts_table} p
|
||
JOIN #{posts_table} p2 ON p2.first AND p2.tid = p.tid
|
||
JOIN #{topics_table} t ON t.tid = p.tid
|
||
where t.tid < 10000
|
||
ORDER BY id ASC, topic_id ASC
|
||
LIMIT #{BATCH_SIZE}
|
||
OFFSET #{offset};
|
||
")
|
||
# u.status != -1 AND u.groupid != 4 AND u.groupid != 5 用户未被锁定、禁访或禁言。在现实中的 Discuz 论坛,禁止的用户通常是广告机或驱逐的用户,这些不需要导入。
|
||
break if results.size < 1
|
||
|
||
next if all_records_exist? :posts, results.map { |p| p["id"].to_i }
|
||
|
||
create_posts(results, total: total_count, offset: offset) do |m|
|
||
skip = false
|
||
mapped = {}
|
||
|
||
mapped[:id] = m['id']
|
||
mapped[:user_id] = user_id_from_imported_user_id(m['user_id']) || -1
|
||
mapped[:raw] = process_discuzx_post(m['raw'], m['id'])
|
||
mapped[:created_at] = Time.zone.at(m['post_time'])
|
||
mapped[:tags] = m['tags']
|
||
|
||
if m['id'] == m['first_id']
|
||
mapped[:category] = category_id_from_imported_category_id(m['category_id'])
|
||
mapped[:title] = CGI.unescapeHTML(m['title'])
|
||
|
||
if m['special'] == 1
|
||
results = mysql_query("
|
||
SELECT multiple, maxchoices
|
||
FROM #{table_name 'forum_poll'}
|
||
WHERE tid = #{m['topic_id']}")
|
||
poll = results.first || {}
|
||
results = mysql_query("
|
||
SELECT polloption
|
||
FROM #{table_name 'forum_polloption'}
|
||
WHERE tid = #{m['topic_id']}
|
||
ORDER BY displayorder")
|
||
if results.empty?
|
||
puts "WARNING: can't find poll options for topic #{m['topic_id']}, skip poll"
|
||
else
|
||
mapped[:raw].prepend "[poll#{poll['multiple'] ? ' type=multiple' : ''}#{poll['maxchoices'] > 0 ? " max=#{poll['maxchoices']}" : ''}]\n#{results.map { |option|'- ' + option['polloption'] }.join("\n")}\n[/poll]\n"
|
||
end
|
||
end
|
||
else
|
||
parent = topic_lookup_from_imported_post_id(m['first_id'])
|
||
|
||
if parent
|
||
mapped[:topic_id] = parent[:topic_id]
|
||
reply_post_import_id = find_post_id_by_quote_number(m['raw'])
|
||
if reply_post_import_id
|
||
post_id = post_id_from_imported_post_id(reply_post_import_id.to_i)
|
||
if (post = Post.find_by(id: post_id))
|
||
if post.topic_id == mapped[:topic_id]
|
||
mapped[:reply_to_post_number] = post.post_number
|
||
else
|
||
puts "post #{m['id']} reply to another topic, skip reply"
|
||
end
|
||
else
|
||
puts "post #{m['id']} reply to not exists post #{reply_post_import_id}, skip reply"
|
||
end
|
||
end
|
||
else
|
||
puts "Parent topic #{m['topic_id']} doesn't exist. Skipping #{m['id']}: #{m['title'][0..40]}"
|
||
skip = true
|
||
end
|
||
|
||
end
|
||
|
||
if m['status'] & 1 == 1 || mapped[:raw].blank?
|
||
mapped[:post_create_action] = lambda do |action_post|
|
||
PostDestroyer.new(Discourse.system_user, action_post).perform_delete
|
||
end
|
||
elsif (m['status'] & 2) >> 1 == 1 # waiting for approve
|
||
mapped[:post_create_action] = lambda do |action_post|
|
||
PostActionCreator.notify_user(Discourse.system_user, action_post)
|
||
end
|
||
end
|
||
skip ? nil : mapped
|
||
end
|
||
end
|
||
end
|
||
|
||
def import_bookmarks
|
||
puts '', 'creating bookmarks'
|
||
favorites_table = table_name 'home_favorite'
|
||
posts_table = table_name 'forum_post'
|
||
|
||
total_count = mysql_query("SELECT count(*) count FROM #{favorites_table} WHERE idtype = 'tid'").first['count']
|
||
batches(BATCH_SIZE) do |offset|
|
||
results = mysql_query("
|
||
SELECT p.pid post_id, f.uid user_id
|
||
FROM #{favorites_table} f
|
||
JOIN #{posts_table} p ON f.id = p.tid
|
||
WHERE f.idtype = 'tid' AND p.first
|
||
LIMIT #{BATCH_SIZE}
|
||
OFFSET #{offset};")
|
||
|
||
break if results.size < 1
|
||
|
||
# next if all_records_exist?
|
||
|
||
create_bookmarks(results, total: total_count, offset: offset) do |row|
|
||
{
|
||
user_id: row['user_id'],
|
||
post_id: row['post_id']
|
||
}
|
||
end
|
||
end
|
||
end
|
||
|
||
def import_private_messages
|
||
puts '', 'creating private messages'
|
||
|
||
pm_indexes = table_name 'ucenter_pm_indexes'
|
||
pm_messages = table_name 'ucenter_pm_messages'
|
||
total_count = mysql_query("SELECT count(*) count FROM #{pm_indexes}").first['count']
|
||
|
||
batches(BATCH_SIZE) do |offset|
|
||
results = mysql_query("
|
||
SELECT pmid id, plid thread_id, authorid user_id, message, dateline created_at
|
||
FROM #{pm_messages}_1
|
||
UNION SELECT pmid id, plid thread_id, authorid user_id, message, dateline created_at
|
||
FROM #{pm_messages}_2
|
||
UNION SELECT pmid id, plid thread_id, authorid user_id, message, dateline created_at
|
||
FROM #{pm_messages}_3
|
||
UNION SELECT pmid id, plid thread_id, authorid user_id, message, dateline created_at
|
||
FROM #{pm_messages}_4
|
||
UNION SELECT pmid id, plid thread_id, authorid user_id, message, dateline created_at
|
||
FROM #{pm_messages}_5
|
||
UNION SELECT pmid id, plid thread_id, authorid user_id, message, dateline created_at
|
||
FROM #{pm_messages}_6
|
||
UNION SELECT pmid id, plid thread_id, authorid user_id, message, dateline created_at
|
||
FROM #{pm_messages}_7
|
||
UNION SELECT pmid id, plid thread_id, authorid user_id, message, dateline created_at
|
||
FROM #{pm_messages}_8
|
||
UNION SELECT pmid id, plid thread_id, authorid user_id, message, dateline created_at
|
||
FROM #{pm_messages}_9
|
||
ORDER BY thread_id ASC, id ASC
|
||
LIMIT #{BATCH_SIZE}
|
||
OFFSET #{offset};")
|
||
|
||
break if results.size < 1
|
||
|
||
# next if all_records_exist? :posts, results.map {|m| "pm:#{m['id']}"}
|
||
|
||
create_posts(results, total: total_count, offset: offset) do |m|
|
||
skip = false
|
||
mapped = {}
|
||
|
||
mapped[:id] = "pm:#{m['id']}"
|
||
mapped[:user_id] = user_id_from_imported_user_id(m['user_id']) || -1
|
||
mapped[:raw] = process_discuzx_post(m['message'], m['id'])
|
||
mapped[:created_at] = Time.zone.at(m['created_at'])
|
||
thread_id = "pm_#{m['thread_id']}"
|
||
|
||
if is_first_pm(m['id'], m['thread_id'])
|
||
# find the title from list table
|
||
pm_thread = mysql_query("
|
||
SELECT plid thread_id, subject
|
||
FROM #{table_name 'ucenter_pm_lists'}
|
||
WHERE plid = #{m['thread_id']};").first
|
||
mapped[:title] = pm_thread['subject']
|
||
mapped[:archetype] = Archetype.private_message
|
||
|
||
# Find the users who are part of this private message.
|
||
import_user_ids = mysql_query("
|
||
SELECT plid thread_id, uid user_id
|
||
FROM #{table_name 'ucenter_pm_members'}
|
||
WHERE plid = #{m['thread_id']};
|
||
").map { |r| r['user_id'] }.uniq
|
||
|
||
mapped[:target_usernames] = import_user_ids.map! do |import_user_id|
|
||
import_user_id.to_s == m['user_id'].to_s ? nil : User.find_by(id: user_id_from_imported_user_id(import_user_id)).try(:username)
|
||
end.compact
|
||
|
||
if mapped[:target_usernames].empty? # pm with yourself?
|
||
skip = true
|
||
puts "Skipping pm:#{m['id']} due to no target"
|
||
else
|
||
@first_post_id_by_topic_id[thread_id] = mapped[:id]
|
||
end
|
||
else
|
||
parent = topic_lookup_from_imported_post_id(@first_post_id_by_topic_id[thread_id])
|
||
if parent
|
||
mapped[:topic_id] = parent[:topic_id]
|
||
else
|
||
puts "Parent post pm thread:#{thread_id} doesn't exist. Skipping #{m["id"]}: #{m["message"][0..40]}"
|
||
skip = true
|
||
end
|
||
end
|
||
|
||
skip ? nil : mapped
|
||
end
|
||
|
||
end
|
||
end
|
||
|
||
# search for first pm id for the series of pm
|
||
def is_first_pm(pm_id, thread_id)
|
||
result = mysql_query("
|
||
SELECT pmid id
|
||
FROM #{table_name 'ucenter_pm_indexes'}
|
||
WHERE plid = #{thread_id}
|
||
ORDER BY id")
|
||
result.first['id'].to_s == pm_id.to_s
|
||
end
|
||
|
||
def process_and_upload_inline_images(raw)
|
||
inline_image_regex = /\[img\]([\s\S]*?)\[\/img\]/
|
||
|
||
s = raw.dup
|
||
|
||
s.gsub!(inline_image_regex) do |d|
|
||
matches = inline_image_regex.match(d)
|
||
data = matches[1]
|
||
|
||
upload, filename = upload_inline_image data
|
||
upload ? html_for_upload(upload, filename) : nil
|
||
end
|
||
|
||
end
|
||
|
||
def process_discuzx_post(raw, import_id)
|
||
# raw = process_and_upload_inline_images(raw)
|
||
s = raw.dup
|
||
|
||
# Strip the quote
|
||
# [quote] quotation includes the topic which is the same as reply to in Discourse
|
||
# We get the pid to find the post number the post reply to. So it can be stripped
|
||
s = s.gsub(/\[b\]回复 \[url=forum.php\?mod=redirect&goto=findpost&pid=\d+&ptid=\d+\].* 的帖子\[\/url\]\[\/b\]/i, '').strip
|
||
s = s.gsub(/\[b\]回复 \[url=https?:\/\/#{ORIGINAL_SITE_PREFIX}\/redirect.php\?goto=findpost&pid=\d+&ptid=\d+\].*?\[\/url\].*?\[\/b\]/i, '').strip
|
||
|
||
s.gsub!(/\[quote\](.*)?\[\/quote\]/im) do |matched|
|
||
content = $1
|
||
post_import_id = find_post_id_by_quote_number(content)
|
||
if post_import_id
|
||
post_id = post_id_from_imported_post_id(post_import_id.to_i)
|
||
if (post = Post.find_by(id: post_id))
|
||
"[quote=\"#{post.user.username}\", post: #{post.post_number}, topic: #{post.topic_id}]\n#{content}\n[/quote]"
|
||
else
|
||
puts "post #{import_id} quote to not exists post #{post_import_id}, skip reply"
|
||
matched[0]
|
||
end
|
||
else
|
||
matched[0]
|
||
end
|
||
end
|
||
|
||
s.gsub!(/\[size=2\]\[color=#999999\].*? 发表于 [\d\-\: ]*\[\/color\] \[url=forum.php\?mod=redirect&goto=findpost&pid=\d+&ptid=\d+\].*?\[\/url\]\[\/size\]/i, '')
|
||
s.gsub!(/\[size=2\]\[color=#999999\].*? 发表于 [\d\-\: ]*\[\/color\] \[url=https?:\/\/#{ORIGINAL_SITE_PREFIX}\/redirect.php\?goto=findpost&pid=\d+&ptid=\d+\].*?\[\/url\]\[\/size\]/i, '')
|
||
|
||
# convert quote
|
||
s.gsub!(/\[quote\](.*?)\[\/quote\]/m) { "\n" + ($1.strip).gsub(/^/, '> ') + "\n" }
|
||
|
||
# truncate line space, preventing line starting with many blanks to be parsed as code blocks
|
||
s.gsub!(/^ {4,}/, ' ')
|
||
|
||
# TODO: Much better to use bbcode-to-md gem
|
||
# Convert image bbcode with width and height
|
||
s.gsub!(/\[img[^\]]*\]https?:\/\/#{ORIGINAL_SITE_PREFIX}\/(.*)\[\/img\]/i, '[x-attach]\1[/x-attach]') # dont convert attachment
|
||
s.gsub!(/<img[^>]*src="https?:\/\/#{ORIGINAL_SITE_PREFIX}\/(.*)".*?>/i, '[x-attach]\1[/x-attach]') # dont convert attachment
|
||
s.gsub!(/\[img[^\]]*\]https?:\/\/www\.touhou\.cc\/blog\/(.*)\[\/img\]/i, '[x-attach]../blog/\1[/x-attach]') # 私货
|
||
s.gsub!(/\[img[^\]]*\]https?:\/\/www\.touhou\.cc\/ucenter\/avatar.php\?uid=(\d+)[^\]]*\[\/img\]/i) { "[x-attach]#{discuzx_avatar_fullpath($1, false)[0]}[/x-attach]" } # 私货
|
||
s.gsub!(/\[img=(\d+),(\d+)\]([^\]]*)\[\/img\]/i, '<img width="\1" height="\2" src="\3">')
|
||
s.gsub!(/\[img\]([^\]]*)\[\/img\]/i, '<img src="\1">')
|
||
|
||
s.gsub!(/\[qq\]([^\]]*)\[\/qq\]/i, '<a href="http://wpa.qq.com/msgrd?V=3&Uin=\1&Site=[Discuz!]&from=discuz&Menu=yes" target="_blank"><!--<img src="static/image/common/qq_big.gif" border="0">-->QQ 交谈</a>')
|
||
|
||
s.gsub!(/\[email\]([^\]]*)\[\/email\]/i, '[url=mailto:\1]\1[/url]') # bbcode-to-md can convert it
|
||
s.gsub!(/\[s\]([^\]]*)\[\/s\]/i, '<s>\1</s>')
|
||
s.gsub!(/\[sup\]([^\]]*)\[\/sup\]/i, '<sup>\1</sup>')
|
||
s.gsub!(/\[sub\]([^\]]*)\[\/sub\]/i, '<sub>\1</sub>')
|
||
s.gsub!(/\[hr\]/i, "\n---\n")
|
||
|
||
# remove the media tag
|
||
s.gsub!(/\[\/?media[^\]]*\]/i, "\n")
|
||
s.gsub!(/\[\/?flash[^\]]*\]/i, "\n")
|
||
s.gsub!(/\[\/?audio[^\]]*\]/i, "\n")
|
||
s.gsub!(/\[\/?video[^\]]*\]/i, "\n")
|
||
|
||
# Remove the font, p and backcolor tag
|
||
# Discourse doesn't support the font tag
|
||
s.gsub!(/\[font=[^\]]*?\]/i, '')
|
||
s.gsub!(/\[\/font\]/i, '')
|
||
s.gsub!(/\[p=[^\]]*?\]/i, '')
|
||
s.gsub!(/\[\/p\]/i, '')
|
||
s.gsub!(/\[backcolor=[^\]]*?\]/i, '')
|
||
s.gsub!(/\[\/backcolor\]/i, '')
|
||
|
||
# Remove the size tag
|
||
# I really have no idea what is this
|
||
s.gsub!(/\[size=[^\]]*?\]/i, '')
|
||
s.gsub!(/\[\/size\]/i, '')
|
||
|
||
# Remove the color tag
|
||
s.gsub!(/\[color=[^\]]*?\]/i, '')
|
||
s.gsub!(/\[\/color\]/i, '')
|
||
|
||
# Remove the hide tag
|
||
s.gsub!(/\[\/?hide\]/i, '')
|
||
s.gsub!(/\[\/?free[^\]]*\]/i, "\n")
|
||
|
||
# Remove the align tag
|
||
# still don't know what it is
|
||
s.gsub!(/\[align=[^\]]*?\]/i, "\n")
|
||
s.gsub!(/\[\/align\]/i, "\n")
|
||
s.gsub!(/\[float=[^\]]*?\]/i, "\n")
|
||
s.gsub!(/\[\/float\]/i, "\n")
|
||
|
||
# Convert code
|
||
s.gsub!(/\[\/?code\]/i, "\n```\n")
|
||
|
||
# The edit notice should be removed
|
||
# example: 本帖最后由 Helloworld 于 2015-1-28 22:05 编辑
|
||
s.gsub!(/\[i=s\] 本帖最后由[\s\S]*?编辑 \[\/i\]/, '')
|
||
|
||
# Convert the custom smileys to emojis
|
||
# `{:cry:}` to `:cry`
|
||
s.gsub!(/\{(\:\S*?\:)\}/, '\1')
|
||
|
||
# Replace internal forum links that aren't in the <!-- l --> format
|
||
# convert list tags to ul and list=1 tags to ol
|
||
# (basically, we're only missing list=a here...)
|
||
s.gsub!(/\[list\](.*?)\[\/list:u\]/m, '[ul]\1[/ul]')
|
||
s.gsub!(/\[list=1\](.*?)\[\/list:o\]/m, '[ol]\1[/ol]')
|
||
# convert *-tags to li-tags so bbcode-to-md can do its magic on phpBB's lists:
|
||
s.gsub!(/\[\*\](.*?)\[\/\*:m\]/, '[li]\1[/li]')
|
||
|
||
# Discuz can create PM out of a post, which will generates like
|
||
# [url=http://example.com/forum.php?mod=redirect&goto=findpost&pid=111&ptid=11][b]关于您在“主题名称”的帖子[/b][/url]
|
||
s.gsub!(pm_url_regexp) do |discuzx_link|
|
||
replace_internal_link(discuzx_link, $1)
|
||
end
|
||
|
||
# [url][b]text[/b][/url] to **[url]text[/url]**
|
||
s.gsub!(/(\[url=[^\[\]]*?\])\[b\](\S*)\[\/b\](\[\/url\])/, '**\1\2\3**')
|
||
|
||
@internal_url_regexps.each do |internal_url_regexp|
|
||
s.gsub!(internal_url_regexp) do |discuzx_link|
|
||
replace_internal_link(discuzx_link, ($~[:tid].to_i rescue nil), ($~[:pid].to_i rescue nil), ($~[:fid].to_i rescue nil), ($~[:action] rescue nil))
|
||
end
|
||
end
|
||
|
||
# @someone without the url
|
||
s.gsub!(/@\[url=[^\[\]]*?\](\S*)\[\/url\]/i, '@\1')
|
||
|
||
s.scan(/http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/[^\[\]\s]*/) { |link|puts "WARNING: post #{import_id} can't replace internal url #{link}" }
|
||
|
||
s.strip
|
||
end
|
||
|
||
def replace_internal_link(discuzx_link, import_topic_id, import_post_id, import_category_id, action)
|
||
if import_post_id
|
||
post_id = post_id_from_imported_post_id import_post_id
|
||
if post_id
|
||
post = Post.find post_id
|
||
return post.full_url if post
|
||
end
|
||
end
|
||
|
||
if import_topic_id
|
||
|
||
results = mysql_query("SELECT pid
|
||
FROM #{table_name 'forum_post'}
|
||
WHERE tid = #{import_topic_id} AND first
|
||
LIMIT 1")
|
||
|
||
return discuzx_link unless results.size > 0
|
||
|
||
linked_post_id = results.first['pid']
|
||
lookup = topic_lookup_from_imported_post_id(linked_post_id)
|
||
|
||
if lookup
|
||
return "#{NEW_SITE_PREFIX}#{lookup[:url]}"
|
||
else
|
||
return discuzx_link
|
||
end
|
||
|
||
end
|
||
|
||
if import_category_id
|
||
category_id = category_id_from_imported_category_id import_category_id
|
||
if category_id
|
||
category = Category.find category_id
|
||
return category.url if category
|
||
end
|
||
end
|
||
|
||
case action
|
||
when 'index'
|
||
return "#{NEW_SITE_PREFIX}/"
|
||
when 'stat', 'stats', 'ranklist'
|
||
return "#{NEW_SITE_PREFIX}/users"
|
||
end
|
||
|
||
discuzx_link
|
||
end
|
||
|
||
def pm_url_regexp
|
||
@pm_url_regexp ||= Regexp.new("http(?:s)?://#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}/forum\\.php\\?mod=redirect&goto=findpost&pid=\\d+&ptid=(\\d+)")
|
||
end
|
||
|
||
# This step is done separately because it can take multiple attempts to get right (because of
|
||
# missing files, wrong paths, authorized extensions, etc.).
|
||
def import_attachments
|
||
setting = AUTHORIZED_EXTENSIONS.join('|')
|
||
SiteSetting.authorized_extensions = setting if setting != SiteSetting.authorized_extensions
|
||
|
||
attachment_regex = /\[attach\](\d+)\[\/attach\]/
|
||
attachment_link_regex = /\[x-attach\](.+)\[\/x-attach\]/
|
||
|
||
current_count = 0
|
||
total_count = mysql_query("SELECT count(*) count FROM #{table_name 'forum_post'};").first['count']
|
||
|
||
success_count = 0
|
||
fail_count = 0
|
||
|
||
puts '', "Importing attachments...", ''
|
||
|
||
Post.find_each do |post|
|
||
next unless post.custom_fields['import_id'] == post.custom_fields['import_id'].to_i.to_s
|
||
|
||
user = post.user
|
||
|
||
current_count += 1
|
||
print_status current_count, total_count
|
||
|
||
new_raw = post.raw.dup
|
||
|
||
inline_attachments = []
|
||
|
||
new_raw.gsub!(attachment_regex) do |s|
|
||
attachment_id = $1.to_i
|
||
inline_attachments.push attachment_id
|
||
|
||
upload, filename = find_upload(user, post, attachment_id)
|
||
unless upload
|
||
fail_count += 1
|
||
next
|
||
end
|
||
|
||
html_for_upload(upload, filename)
|
||
end
|
||
new_raw.gsub!(attachment_link_regex) do |s|
|
||
attachment_file = $1
|
||
|
||
filename = File.basename(attachment_file)
|
||
upload = create_upload(user.id, File.join(DISCUZX_BASE_DIR, attachment_file), filename)
|
||
unless upload
|
||
fail_count += 1
|
||
next
|
||
end
|
||
|
||
html_for_upload(upload, filename)
|
||
end
|
||
|
||
sql = "SELECT aid
|
||
FROM #{table_name 'forum_attachment'}
|
||
WHERE pid = #{post.custom_fields['import_id']}"
|
||
if !inline_attachments.empty?
|
||
sql = "#{sql} AND aid NOT IN (#{inline_attachments.join(',')})"
|
||
end
|
||
|
||
results = mysql_query(sql)
|
||
|
||
results.each do |attachment|
|
||
attachment_id = attachment['aid']
|
||
upload, filename = find_upload(user, post, attachment_id)
|
||
unless upload
|
||
fail_count += 1
|
||
next
|
||
end
|
||
html = html_for_upload(upload, filename)
|
||
unless new_raw.include? html
|
||
new_raw << "\n"
|
||
new_raw << html
|
||
end
|
||
end
|
||
|
||
if new_raw != post.raw
|
||
PostRevisor.new(post).revise!(post.user, { raw: new_raw }, bypass_bump: true, edit_reason: '从 Discuz 中导入附件')
|
||
end
|
||
|
||
success_count += 1
|
||
end
|
||
|
||
puts '', ''
|
||
puts "succeeded: #{success_count}"
|
||
puts " failed: #{fail_count}" if fail_count > 0
|
||
puts ''
|
||
end
|
||
|
||
# Create the full path to the discuz avatar specified from user id
|
||
def discuzx_avatar_fullpath(user_id, absolute = true)
|
||
padded_id = user_id.to_s.rjust(9, '0')
|
||
|
||
part_1 = padded_id[0..2]
|
||
part_2 = padded_id[3..4]
|
||
part_3 = padded_id[5..6]
|
||
part_4 = padded_id[-2..-1]
|
||
file_name = "#{part_4}_avatar_big.jpg"
|
||
|
||
if absolute
|
||
[File.join(DISCUZX_BASE_DIR, AVATAR_DIR, part_1, part_2, part_3, file_name), file_name]
|
||
else
|
||
[File.join(AVATAR_DIR, part_1, part_2, part_3, file_name), file_name]
|
||
end
|
||
end
|
||
|
||
# post id is in the quote block
|
||
def find_post_id_by_quote_number(raw)
|
||
case raw
|
||
when /\[url=forum.php\?mod=redirect&goto=findpost&pid=(\d+)&ptid=\d+\]/ #standard
|
||
$1
|
||
when /\[url=https?:\/\/#{ORIGINAL_SITE_PREFIX}\/redirect.php\?goto=findpost&pid=(\d+)&ptid=\d+\]/ # old discuz 7 format
|
||
$1
|
||
when /\[quote\][\S\s]*pid=(\d+)[\S\s]*\[\/quote\]/ # quote
|
||
$1
|
||
end
|
||
end
|
||
|
||
# for some reason, discuz inlined some png file
|
||
# the corresponding image stored is broken in a way
|
||
def upload_inline_image(data)
|
||
return unless data
|
||
|
||
puts 'Creating inline image'
|
||
|
||
encoded_photo = data['data:image/png;base64,'.length .. -1]
|
||
if encoded_photo
|
||
raw_file = Base64.decode64(encoded_photo)
|
||
else
|
||
puts 'Error parsed inline photo', data[0..20]
|
||
return
|
||
end
|
||
|
||
real_filename = "#{SecureRandom.hex}.png"
|
||
filename = Tempfile.new(['inline', '.png'])
|
||
begin
|
||
filename.binmode
|
||
filename.write(raw_file)
|
||
filename.rewind
|
||
|
||
upload = create_upload(Discourse::SYSTEM_USER_ID, filename, real_filename)
|
||
ensure
|
||
filename.close rescue nil
|
||
filename.unlink rescue nil
|
||
end
|
||
|
||
if upload.nil? || !upload.valid?
|
||
puts "Upload not valid :("
|
||
puts upload.errors.inspect if upload
|
||
return nil
|
||
end
|
||
|
||
[upload, real_filename]
|
||
end
|
||
|
||
# find the uploaded file and real name from the db
|
||
def find_upload(user, post, upload_id)
|
||
attachment_table = table_name 'forum_attachment'
|
||
# search for table id
|
||
sql = "SELECT a.pid post_id,
|
||
a.aid upload_id,
|
||
a.tableid table_id
|
||
FROM #{attachment_table} a
|
||
WHERE a.pid = #{post.custom_fields['import_id']}
|
||
AND a.aid = #{upload_id};"
|
||
results = mysql_query(sql)
|
||
|
||
unless (meta_data = results.first)
|
||
puts "Couldn't find forum_attachment record meta data for post.id = #{post.id}, import_id = #{post.custom_fields['import_id']}"
|
||
return nil
|
||
end
|
||
|
||
# search for uploaded file meta data
|
||
sql = "SELECT a.pid post_id,
|
||
a.aid upload_id,
|
||
a.tid topic_id,
|
||
a.uid user_id,
|
||
a.dateline uploaded_time,
|
||
a.filename real_filename,
|
||
a.attachment attachment_path,
|
||
a.remote is_remote,
|
||
a.description description,
|
||
a.isimage is_image,
|
||
a.thumb is_thumb
|
||
FROM #{attachment_table}_#{meta_data['table_id']} a
|
||
WHERE a.aid = #{upload_id};"
|
||
results = mysql_query(sql)
|
||
|
||
unless (row = results.first)
|
||
puts "Couldn't find attachment record for post.id = #{post.id}, import_id = #{post.custom_fields['import_id']}"
|
||
return nil
|
||
end
|
||
|
||
filename = File.join(DISCUZX_BASE_DIR, ATTACHMENT_DIR, row['attachment_path'])
|
||
unless File.exists?(filename)
|
||
puts "Attachment file doesn't exist: #{filename}"
|
||
return nil
|
||
end
|
||
real_filename = row['real_filename']
|
||
real_filename.prepend SecureRandom.hex if real_filename[0] == '.'
|
||
upload = create_upload(user.id, filename, real_filename)
|
||
|
||
if upload.nil? || !upload.valid?
|
||
puts "Upload not valid :("
|
||
puts upload.errors.inspect if upload
|
||
return nil
|
||
end
|
||
|
||
[upload, real_filename]
|
||
rescue Mysql2::Error => e
|
||
puts "SQL Error"
|
||
puts e.message
|
||
puts sql
|
||
nil
|
||
end
|
||
|
||
def first_exists(*items)
|
||
items.find { |item|!item.blank? } || ''
|
||
end
|
||
|
||
def mysql_query(sql)
|
||
@client.query(sql, cache_rows: false)
|
||
end
|
||
end
|
||
|
||
ImportScripts::DiscuzX.new.perform
|