mirror of
https://github.com/discourse/discourse.git
synced 2025-01-19 03:22:46 +08:00
Merge pull request #3978 from fantasticfears/discuz_x
Updating Discuz import script (Most work done by zh99998)
This commit is contained in:
commit
4b834253ef
|
@ -7,6 +7,8 @@
|
|||
# This script is tested only on Simplified Chinese Discuz! X instances
|
||||
# If you want to import data other than Simplified Chinese, email me.
|
||||
|
||||
require 'php_serialize'
|
||||
require 'miro'
|
||||
require 'mysql2'
|
||||
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
|
||||
|
||||
|
@ -34,9 +36,23 @@ class ImportScripts::DiscuzX < ImportScripts::Base
|
|||
database: DISCUZX_DB
|
||||
)
|
||||
@first_post_id_by_topic_id = {}
|
||||
|
||||
@internal_url_regexps = [
|
||||
/http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/forum\.php\?mod=viewthread(?:&|&)tid=(?<tid>\d+)(?:[^\[\]\s]*)(?:pid=?(?<pid>\d+))?(?:[^\[\]\s]*)/,
|
||||
/http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/viewthread\.php\?tid=(?<tid>\d+)(?:[^\[\]\s]*)(?:pid=?(?<pid>\d+))?(?:[^\[\]\s]*)/,
|
||||
/http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/forum\.php\?mod=redirect(?:&|&)goto=findpost(?:&|&)pid=(?<pid>\d+)(?:&|&)ptid=(?<tid>\d+)(?:[^\[\]\s]*)/,
|
||||
/http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/redirect\.php\?goto=findpost(?:&|&)pid=(?<pid>\d+)(?:&|&)ptid=(?<tid>\d+)(?:[^\[\]\s]*)/,
|
||||
/http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/forumdisplay\.php\?fid=(?<fid>\d+)(?:[^\[\]\s]*)/,
|
||||
/http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/forum\.php\?mod=forumdisplay(?:&|&)fid=(?<fid>\d+)(?:[^\[\]\s]*)/,
|
||||
/http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/(?<action>index)\.php(?:[^\[\]\s]*)/,
|
||||
/http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/(?<action>stats)\.php(?:[^\[\]\s]*)/,
|
||||
/http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/misc.php\?mod=(?<mod>stat|ranklist)(?:[^\[\]\s]*)/
|
||||
]
|
||||
|
||||
end
|
||||
|
||||
def execute
|
||||
get_knowledge_about_duplicated_email
|
||||
import_users
|
||||
import_categories
|
||||
import_posts
|
||||
|
@ -53,19 +69,53 @@ class ImportScripts::DiscuzX < ImportScripts::Base
|
|||
def get_knowledge_about_group
|
||||
group_table = table_name 'common_usergroup'
|
||||
result = mysql_query(
|
||||
"SELECT groupid group_id, radminid role_id, type, grouptitle title
|
||||
"SELECT groupid group_id, radminid role_id
|
||||
FROM #{group_table};")
|
||||
@moderator_group_id = -1
|
||||
@admin_group_id = -1
|
||||
@moderator_group_id = []
|
||||
@admin_group_id = []
|
||||
#@banned_group_id = [4,5] # 禁止的用户及其帖子均不导入,如果你想导入这些用户和帖子,请把这个数组清空。
|
||||
|
||||
result.each do |group|
|
||||
role_id = group['role_id']
|
||||
group_id = group['group_id']
|
||||
case group['title'].strip
|
||||
when '管理员'
|
||||
@admin_admin_id = role_id
|
||||
when '超级版主'
|
||||
@moderator_admin_id = role_id
|
||||
case group['role_id']
|
||||
when 1 # 管理员
|
||||
@admin_group_id << group['group_id']
|
||||
when 2, 3 # 超级版主、版主。如果你不希望原普通版主成为Discourse版主,把3去掉。
|
||||
@moderator_group_id << group['group_id']
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def get_knowledge_about_category_slug
|
||||
@category_slug = {}
|
||||
results = mysql_query("SELECT svalue value
|
||||
FROM #{table_name 'common_setting'}
|
||||
WHERE skey = 'forumkeys'")
|
||||
|
||||
return if results.size < 1
|
||||
value = results.first['value']
|
||||
|
||||
return if value.blank?
|
||||
|
||||
PHP.unserialize(value).each do |category_import_id, slug|
|
||||
next if slug.blank?
|
||||
@category_slug[category_import_id] = slug
|
||||
end
|
||||
end
|
||||
|
||||
def get_knowledge_about_duplicated_email
|
||||
@duplicated_email = {}
|
||||
results = mysql_query(
|
||||
"select a.uid uid, b.uid import_id from pre_common_member a
|
||||
join (select uid, email from pre_common_member group by email having count(email) > 1 order by uid asc) b USING(email)
|
||||
where a.uid != b.uid")
|
||||
|
||||
users = @lookup.instance_variable_get :@users
|
||||
|
||||
results.each do |row|
|
||||
@duplicated_email[row['uid']] = row['import_id']
|
||||
user_id = users[row['import_id']]
|
||||
if user_id
|
||||
users[row['uid']] = user_id
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -79,51 +129,63 @@ class ImportScripts::DiscuzX < ImportScripts::Base
|
|||
user_table = table_name 'common_member'
|
||||
profile_table = table_name 'common_member_profile'
|
||||
status_table = table_name 'common_member_status'
|
||||
forum_table = table_name 'common_member_field_forum'
|
||||
home_table = table_name 'common_member_field_home'
|
||||
total_count = mysql_query("SELECT count(*) count FROM #{user_table};").first['count']
|
||||
|
||||
batches(BATCH_SIZE) do |offset|
|
||||
results = mysql_query(
|
||||
"SELECT u.uid id, u.username username, u.email email, u.adminid admin_id, su.regdate regdate, s.regip regip,
|
||||
u.emailstatus email_confirmed, u.avatarstatus avatar_exists, p.site website, p.resideprovince province,
|
||||
p.residecity city, p.residedist country, p.residecommunity community, p.residesuite apartment,
|
||||
p.bio bio, s.lastip last_visit_ip, s.lastvisit last_visit_time, s.lastpost last_posted_at,
|
||||
s.lastsendmail last_emailed_at
|
||||
"SELECT u.uid id, u.username username, u.email email, u.groupid group_id,
|
||||
su.regdate regdate, su.password password_hash, su.salt salt,
|
||||
s.regip regip, s.lastip last_visit_ip, s.lastvisit last_visit_time, s.lastpost last_posted_at, s.lastsendmail last_emailed_at,
|
||||
u.emailstatus email_confirmed, u.avatarstatus avatar_exists,
|
||||
p.site website, p.address address, p.bio bio, p.realname realname, p.qq qq,
|
||||
p.resideprovince resideprovince, p.residecity residecity, p.residedist residedist, p.residecommunity residecommunity,
|
||||
p.resideprovince birthprovince, p.birthcity birthcity, p.birthdist birthdist, p.birthcommunity birthcommunity,
|
||||
h.spacecss spacecss, h.spacenote spacenote,
|
||||
f.customstatus customstatus, f.sightml sightml
|
||||
FROM #{user_table} u
|
||||
JOIN #{sensitive_user_table} su ON su.uid = u.uid
|
||||
JOIN #{profile_table} p ON p.uid = u.uid
|
||||
JOIN #{status_table} s ON s.uid = u.uid
|
||||
LEFT JOIN #{sensitive_user_table} su USING(uid)
|
||||
LEFT JOIN #{profile_table} p USING(uid)
|
||||
LEFT JOIN #{status_table} s USING(uid)
|
||||
LEFT JOIN #{forum_table} f USING(uid)
|
||||
LEFT JOIN #{home_table} h USING(uid)
|
||||
ORDER BY u.uid ASC
|
||||
LIMIT #{BATCH_SIZE}
|
||||
OFFSET #{offset};")
|
||||
|
||||
break if results.size < 1
|
||||
|
||||
next if all_records_exist? :users, users.map {|u| u["id"].to_i}
|
||||
# TODO: breaks the scipt reported by some users
|
||||
# next if all_records_exist? :users, users.map {|u| u["id"].to_i}
|
||||
|
||||
create_users(results, total: total_count, offset: offset) do |user|
|
||||
{ id: user['id'],
|
||||
email: user['email'],
|
||||
username: user['username'],
|
||||
name: user['username'],
|
||||
created_at: Time.zone.at(user['regdate']),
|
||||
name: first_exists(user['realname'], user['customstatus'], user['username']),
|
||||
import_pass: user['password_hash'],
|
||||
active: true,
|
||||
salt: user['salt'],
|
||||
# TODO: title: user['customstatus'], # move custom title to name since discourse can't let user custom title https://meta.discourse.org/t/let-users-custom-their-title/37626
|
||||
created_at: user['regdate'] ? Time.zone.at(user['regdate']) : nil,
|
||||
registration_ip_address: user['regip'],
|
||||
ip_address: user['last_visit_ip'],
|
||||
last_seen_at: user['last_visit_time'],
|
||||
last_emailed_at: user['last_emailed_at'],
|
||||
last_posted_at: user['last_posted_at'],
|
||||
moderator: user['admin_id'] == @moderator_admin_id,
|
||||
admin: user['admin_id'] == @admin_admin_id,
|
||||
active: true,
|
||||
website: user['website'],
|
||||
bio_raw: user['bio'],
|
||||
location: "#{user['province']}#{user['city']}#{user['country']}#{user['community']}#{user['apartment']}",
|
||||
moderator: @moderator_group_id.include?(user['group_id']),
|
||||
admin: @admin_group_id.include?(user['group_id']),
|
||||
website: (user['website'] and user['website'].include?('.')) ? user['website'].strip : ( user['qq'] and user['qq'].strip == user['qq'].strip.to_i and user['qq'].strip.to_i > 10000 ) ? 'http://user.qzone.qq.com/' + user['qq'].strip : nil,
|
||||
bio_raw: first_exists((user['bio'] and CGI.unescapeHTML(user['bio'])), user['sightml'], user['spacenote']).strip[0,3000],
|
||||
location: first_exists(user['address'], (!user['resideprovince'].blank? ? [user['resideprovince'], user['residecity'], user['residedist'], user['residecommunity']] : [user['birthprovince'], user['birthcity'], user['birthdist'], user['birthcommunity']]).reject{|location|location.blank?}.join(' ')),
|
||||
post_create_action: lambda do |newmember|
|
||||
if user['avatar_exists'] == 1 and newmember.uploaded_avatar_id.blank?
|
||||
path, filename = discuzx_avatar_fullpath(user['id'])
|
||||
if path
|
||||
begin
|
||||
upload = create_upload(newmember.id, path, filename)
|
||||
if upload.persisted?
|
||||
if !upload.nil? && upload.persisted?
|
||||
newmember.import_mode = false
|
||||
newmember.create_user_avatar
|
||||
newmember.import_mode = true
|
||||
|
@ -137,9 +199,42 @@ class ImportScripts::DiscuzX < ImportScripts::Base
|
|||
end
|
||||
end
|
||||
end
|
||||
if !user['spacecss'].blank? and newmember.user_profile.profile_background.blank?
|
||||
# profile background
|
||||
if matched = user['spacecss'].match(/body\s*{[^}]*url\('?(.+?)'?\)/i)
|
||||
body_background = matched[1].split(ORIGINAL_SITE_PREFIX, 2).last
|
||||
end
|
||||
if matched = user['spacecss'].match(/#hd\s*{[^}]*url\('?(.+?)'?\)/i)
|
||||
header_background = matched[1].split(ORIGINAL_SITE_PREFIX, 2).last
|
||||
end
|
||||
if matched = user['spacecss'].match(/.blocktitle\s*{[^}]*url\('?(.+?)'?\)/i)
|
||||
blocktitle_background = matched[1].split(ORIGINAL_SITE_PREFIX, 2).last
|
||||
end
|
||||
if matched = user['spacecss'].match(/#ct\s*{[^}]*url\('?(.+?)'?\)/i)
|
||||
content_background = matched[1].split(ORIGINAL_SITE_PREFIX, 2).last
|
||||
end
|
||||
|
||||
if body_background || header_background || blocktitle_background || content_background
|
||||
profile_background = first_exists(header_background, body_background, content_background, blocktitle_background)
|
||||
card_background = first_exists(content_background, body_background, header_background, blocktitle_background)
|
||||
upload = create_upload(newmember.id, File.join(DISCUZX_BASE_DIR, profile_background), File.basename(profile_background))
|
||||
if upload
|
||||
newmember.user_profile.upload_profile_background upload
|
||||
else
|
||||
puts "WARNING: #{user['username']} (UID: #{user['id']}) profile_background file did not persist!"
|
||||
end
|
||||
upload = create_upload(newmember.id, File.join(DISCUZX_BASE_DIR, card_background), File.basename(card_background))
|
||||
if upload
|
||||
newmember.user_profile.upload_card_background upload
|
||||
else
|
||||
puts "WARNING: #{user['username']} (UID: #{user['id']}) card_background file did not persist!"
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# we don't send email to the unconfirmed user
|
||||
newmember.update(email_digests: user['email_confirmed'] == 1) if newmember.email_digests
|
||||
newmember.update(name: '') if !newmember.name.blank? and newmember.name == newmember.username
|
||||
end
|
||||
}
|
||||
end
|
||||
|
@ -149,27 +244,57 @@ class ImportScripts::DiscuzX < ImportScripts::Base
|
|||
def import_categories
|
||||
puts '', "creating categories"
|
||||
|
||||
get_knowledge_about_category_slug
|
||||
|
||||
forums_table = table_name 'forum_forum'
|
||||
forums_data_table = table_name 'forum_forumfield'
|
||||
|
||||
results = mysql_query("
|
||||
SELECT f.fid id, f.fup parent_id, f.name, f.type type, f.status status, f.displayorder position,
|
||||
d.description description
|
||||
d.description description, d.rules rules, d.icon, d.extra extra
|
||||
FROM #{forums_table} f
|
||||
JOIN #{forums_data_table} d ON f.fid = d.fid
|
||||
LEFT JOIN #{forums_data_table} d USING(fid)
|
||||
ORDER BY parent_id ASC, id ASC
|
||||
")
|
||||
|
||||
max_position = Category.all.max_by(&:position).position
|
||||
create_categories(results) do |row|
|
||||
next if row['type'] == 'group' || row['status'].to_i == 3
|
||||
next if row['type'] == 'group' or row['status'] == 2 # or row['status'].to_i == 3 # 如果不想导入群组,取消注释
|
||||
extra = PHP.unserialize(row['extra']) if !row['extra'].blank?
|
||||
if extra and !extra["namecolor"].blank?
|
||||
color = extra["namecolor"][1,6]
|
||||
end
|
||||
|
||||
Category.all.max_by(&:position).position
|
||||
|
||||
h = {
|
||||
id: row['id'],
|
||||
name: row['name'],
|
||||
description: row['description'],
|
||||
position: row['position'].to_i + max_position
|
||||
position: row['position'].to_i + max_position,
|
||||
color: color,
|
||||
suppress_from_homepage: (row['status'] == 0 or row['status'] == 3),
|
||||
post_create_action: lambda do |category|
|
||||
if slug = @category_slug[row['id']]
|
||||
category.update(slug: slug)
|
||||
end
|
||||
|
||||
raw = process_discuzx_post(row['rules'], nil)
|
||||
if @bbcode_to_md
|
||||
raw = raw.bbcode_to_md(false) rescue raw
|
||||
end
|
||||
category.topic.posts.first.update_attribute(:raw, raw)
|
||||
if !row['icon'].empty?
|
||||
upload = create_upload(Discourse::SYSTEM_USER_ID, File.join(DISCUZX_BASE_DIR, ATTACHMENT_DIR, '../common', row['icon']), File.basename(row['icon']))
|
||||
if upload
|
||||
category.logo_url = upload.url
|
||||
# FIXME: I don't know how to get '/shared' by script. May change to Rails.root
|
||||
category.color = Miro::DominantColors.new(File.join('/shared', category.logo_url)).to_hex.first[1,6] if !color
|
||||
category.save!
|
||||
end
|
||||
end
|
||||
category
|
||||
end
|
||||
}
|
||||
if row['parent_id'].to_i > 0
|
||||
h[:parent_category_id] = category_id_from_imported_category_id(row['parent_id'])
|
||||
|
@ -181,6 +306,7 @@ class ImportScripts::DiscuzX < ImportScripts::Base
|
|||
def import_posts
|
||||
puts "", "creating topics and posts"
|
||||
|
||||
users_table = table_name 'common_member'
|
||||
posts_table = table_name 'forum_post'
|
||||
topics_table = table_name 'forum_thread'
|
||||
|
||||
|
@ -195,16 +321,18 @@ class ImportScripts::DiscuzX < ImportScripts::Base
|
|||
p.authorid user_id,
|
||||
p.message raw,
|
||||
p.dateline post_time,
|
||||
p.first is_first_post,
|
||||
p.invisible status
|
||||
FROM #{posts_table} p,
|
||||
#{topics_table} t
|
||||
WHERE p.tid = t.tid
|
||||
p2.pid first_id,
|
||||
p.invisible status,
|
||||
t.special special
|
||||
FROM #{posts_table} p
|
||||
JOIN #{posts_table} p2 ON p2.first AND p2.tid = p.tid
|
||||
JOIN #{topics_table} t ON t.tid = p.tid
|
||||
where t.tid < 10000
|
||||
ORDER BY id ASC, topic_id ASC
|
||||
LIMIT #{BATCH_SIZE}
|
||||
OFFSET #{offset};
|
||||
")
|
||||
|
||||
# u.status != -1 AND u.groupid != 4 AND u.groupid != 5 用户未被锁定、禁访或禁言。在现实中的 Discuz 论坛,禁止的用户通常是广告机或驱逐的用户,这些不需要导入。
|
||||
break if results.size < 1
|
||||
|
||||
next if all_records_exist? :posts, results.map {|p| p["id"].to_i}
|
||||
|
@ -218,40 +346,95 @@ class ImportScripts::DiscuzX < ImportScripts::Base
|
|||
mapped[:raw] = process_discuzx_post(m['raw'], m['id'])
|
||||
mapped[:created_at] = Time.zone.at(m['post_time'])
|
||||
|
||||
if m['is_first_post'] == 1
|
||||
if m['id'] == m['first_id']
|
||||
mapped[:category] = category_id_from_imported_category_id(m['category_id'])
|
||||
mapped[:title] = CGI.unescapeHTML(m['title'])
|
||||
@first_post_id_by_topic_id[m['topic_id']] = m['id']
|
||||
|
||||
if m['special'] == 1
|
||||
results = mysql_query("
|
||||
SELECT multiple, maxchoices
|
||||
FROM #{table_name 'forum_poll'}
|
||||
WHERE tid = #{m['topic_id']}")
|
||||
poll = results.first || {}
|
||||
results = mysql_query("
|
||||
SELECT polloption
|
||||
FROM #{table_name 'forum_polloption'}
|
||||
WHERE tid = #{m['topic_id']}
|
||||
ORDER BY displayorder")
|
||||
if results.empty?
|
||||
puts "WARNING: can't find poll options for topic #{m['topic_id']}, skip poll"
|
||||
else
|
||||
mapped[:raw].prepend "[poll#{poll['multiple'] ? ' type=multiple' : ''}#{poll['maxchoices'] > 0 ? " max=#{poll['maxchoices']}" : ''}]\n#{results.map{|option|'- ' + option['polloption']}.join("\n")}\n[/poll]\n"
|
||||
end
|
||||
end
|
||||
else
|
||||
parent = topic_lookup_from_imported_post_id(@first_post_id_by_topic_id[m['topic_id']])
|
||||
parent = topic_lookup_from_imported_post_id(m['first_id'])
|
||||
|
||||
if parent
|
||||
mapped[:topic_id] = parent[:topic_id]
|
||||
post_id = post_id_from_imported_post_id(find_post_id_by_quote_number(m['raw']).to_i)
|
||||
if (post = Post.find_by(id: post_id))
|
||||
mapped[:reply_to_post_number] = post.post_number
|
||||
reply_post_import_id = find_post_id_by_quote_number(m['raw'])
|
||||
if reply_post_import_id
|
||||
post_id = post_id_from_imported_post_id(reply_post_import_id.to_i)
|
||||
if (post = Post.find_by(id: post_id))
|
||||
if post.topic_id == mapped[:topic_id]
|
||||
mapped[:reply_to_post_number] = post.post_number
|
||||
else
|
||||
puts "post #{m['id']} reply to another topic, skip reply"
|
||||
end
|
||||
else
|
||||
puts "post #{m['id']} reply to not exists post #{reply_post_import_id}, skip reply"
|
||||
end
|
||||
end
|
||||
else
|
||||
puts "Parent topic #{m['topic_id']} doesn't exist. Skipping #{m['id']}: #{m['title'][0..40]}"
|
||||
skip = true
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
if [-5, -3, -1].include? m['status'] || mapped[:raw].blank?
|
||||
if m['status'] & 1 == 1 || mapped[:raw].blank?
|
||||
mapped[:post_create_action] = lambda do |post|
|
||||
PostDestroyer.new(Discourse.system_user, post).perform_delete
|
||||
end
|
||||
elsif m['status'] == -2# waiting for approve
|
||||
elsif (m['status'] & 2) >> 1 == 1 # waiting for approve
|
||||
mapped[:post_create_action] = lambda do |post|
|
||||
PostAction.act(Discourse.system_user, post, 6, {take_action: false})
|
||||
end
|
||||
end
|
||||
|
||||
skip ? nil : mapped
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def import_bookmarks
|
||||
puts '', 'creating bookmarks'
|
||||
favorites_table = table_name 'home_favorite'
|
||||
posts_table = table_name 'forum_post'
|
||||
|
||||
total_count = mysql_query("SELECT count(*) count FROM #{favorites_table} WHERE idtype = 'tid'").first['count']
|
||||
batches(BATCH_SIZE) do |offset|
|
||||
results = mysql_query("
|
||||
SELECT p.pid post_id, f.uid user_id
|
||||
FROM #{favorites_table} f
|
||||
JOIN #{posts_table} p ON f.id = p.tid
|
||||
WHERE f.idtype = 'tid' AND p.first
|
||||
LIMIT #{BATCH_SIZE}
|
||||
OFFSET #{offset};")
|
||||
|
||||
break if results.size < 1
|
||||
|
||||
# next if all_records_exist?
|
||||
|
||||
create_bookmarks(results, total: total_count, offset: offset) do |row|
|
||||
{
|
||||
user_id: row['user_id'],
|
||||
post_id: row['post_id']
|
||||
}
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
def import_private_messages
|
||||
puts '', 'creating private messages'
|
||||
|
||||
|
@ -285,7 +468,7 @@ class ImportScripts::DiscuzX < ImportScripts::Base
|
|||
|
||||
break if results.size < 1
|
||||
|
||||
next if all_records_exist? :posts, results.map {|m| "pm:#{m['id']}"}
|
||||
# next if all_records_exist? :posts, results.map {|m| "pm:#{m['id']}"}
|
||||
|
||||
create_posts(results, total: total_count, offset: offset) do |m|
|
||||
skip = false
|
||||
|
@ -349,8 +532,9 @@ class ImportScripts::DiscuzX < ImportScripts::Base
|
|||
result.first['id'].to_s == pm_id.to_s
|
||||
end
|
||||
|
||||
def process_discuzx_post(raw, import_id)
|
||||
def process_and_upload_inline_images(raw)
|
||||
inline_image_regex = /\[img\]([\s\S]*?)\[\/img\]/
|
||||
|
||||
s = raw.dup
|
||||
|
||||
s.gsub!(inline_image_regex) do |d|
|
||||
|
@ -361,14 +545,65 @@ class ImportScripts::DiscuzX < ImportScripts::Base
|
|||
upload ? html_for_upload(upload, filename) : nil
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
def process_discuzx_post(raw, import_id)
|
||||
# raw = process_and_upload_inline_images(raw)
|
||||
s = raw.dup
|
||||
|
||||
# Strip the quote
|
||||
# [quote] quotation includes the topic which is the same as reply to in Discourse
|
||||
# We get the pid to find the post number the post reply to. So it can be stripped
|
||||
s = s.gsub(/\[quote\][\s\S]*?\[\/quote\]/i, '').strip
|
||||
s = s.gsub(/\[b\]回复 \[url=forum.php\?mod=redirect&goto=findpost&pid=\d+&ptid=\d+\].* 的帖子\[\/url\]\[\/b\]/i, '').strip
|
||||
s = s.gsub(/\[b\]回复 \[url=https?:\/\/#{ORIGINAL_SITE_PREFIX}\/redirect.php\?goto=findpost&pid=\d+&ptid=\d+\].*?\[\/url\].*?\[\/b\]/i, '').strip
|
||||
|
||||
# Convert image bbcode
|
||||
s.gsub!(/\[quote\](.*)?\[\/quote\]/im) do |matched|
|
||||
content = $1
|
||||
post_import_id = find_post_id_by_quote_number(content)
|
||||
if post_import_id
|
||||
post_id = post_id_from_imported_post_id(post_import_id.to_i)
|
||||
if (post = Post.find_by(id: post_id))
|
||||
"[quote=\"#{post.user.username}\", post: #{post.post_number}, topic: #{post.topic_id}]\n#{content}\n[/quote]"
|
||||
else
|
||||
puts "post #{import_id} quote to not exists post #{post_import_id}, skip reply"
|
||||
matched[0]
|
||||
end
|
||||
else
|
||||
matched[0]
|
||||
end
|
||||
end
|
||||
|
||||
s.gsub!(/\[size=2\]\[color=#999999\].*? 发表于 [\d\-\: ]*\[\/color\] \[url=forum.php\?mod=redirect&goto=findpost&pid=\d+&ptid=\d+\].*?\[\/url\]\[\/size\]/i, '')
|
||||
s.gsub!(/\[size=2\]\[color=#999999\].*? 发表于 [\d\-\: ]*\[\/color\] \[url=https?:\/\/#{ORIGINAL_SITE_PREFIX}\/redirect.php\?goto=findpost&pid=\d+&ptid=\d+\].*?\[\/url\]\[\/size\]/i, '')
|
||||
|
||||
# convert quote
|
||||
s.gsub!(/\[quote\](.*?)\[\/quote\]/m) { "\n" + ($1.strip).gsub(/^/, '> ') + "\n" }
|
||||
|
||||
# truncate line space, preventing line starting with many blanks to be parsed as code blocks
|
||||
s.gsub!(/^ {4,}/, ' ')
|
||||
|
||||
# TODO: Much better to use bbcode-to-md gem
|
||||
# Convert image bbcode with width and height
|
||||
s.gsub!(/\[img[^\]]*\]https?:\/\/#{ORIGINAL_SITE_PREFIX}\/(.*)\[\/img\]/i, '[x-attach]\1[/x-attach]') # dont convert attachment
|
||||
s.gsub!(/<img[^>]*src="https?:\/\/#{ORIGINAL_SITE_PREFIX}\/(.*)".*?>/i, '[x-attach]\1[/x-attach]') # dont convert attachment
|
||||
s.gsub!(/\[img[^\]]*\]https?:\/\/www\.touhou\.cc\/blog\/(.*)\[\/img\]/i, '[x-attach]../blog/\1[/x-attach]') # 私货
|
||||
s.gsub!(/\[img[^\]]*\]https?:\/\/www\.touhou\.cc\/ucenter\/avatar.php\?uid=(\d+)[^\]]*\[\/img\]/i) { "[x-attach]#{discuzx_avatar_fullpath($1,false)[0]}[/x-attach]" } # 私货
|
||||
s.gsub!(/\[img=(\d+),(\d+)\]([^\]]*)\[\/img\]/i, '<img width="\1" height="\2" src="\3">')
|
||||
s.gsub!(/\[img\]([^\]]*)\[\/img\]/i, '<img src="\1">')
|
||||
|
||||
s.gsub!(/\[qq\]([^\]]*)\[\/qq\]/i, '<a href="http://wpa.qq.com/msgrd?V=3&Uin=\1&Site=[Discuz!]&from=discuz&Menu=yes" target="_blank"><!--<img src="static/image/common/qq_big.gif" border="0">-->QQ 交谈</a>')
|
||||
|
||||
s.gsub!(/\[email\]([^\]]*)\[\/email\]/i, '[url=mailto:\1]\1[/url]') # bbcode-to-md can convert it
|
||||
s.gsub!(/\[s\]([^\]]*)\[\/s\]/i, '<s>\1</s>')
|
||||
s.gsub!(/\[sup\]([^\]]*)\[\/sup\]/i, '<sup>\1</sup>')
|
||||
s.gsub!(/\[sub\]([^\]]*)\[\/sub\]/i, '<sub>\1</sub>')
|
||||
s.gsub!(/\[hr\]/i, "\n---\n")
|
||||
|
||||
# remove the media tag
|
||||
s.gsub!(/\[\/?media[^\]]*\]/i, "\n")
|
||||
s.gsub!(/\[\/?flash[^\]]*\]/i, "\n")
|
||||
s.gsub!(/\[\/?audio[^\]]*\]/i, "\n")
|
||||
s.gsub!(/\[\/?video[^\]]*\]/i, "\n")
|
||||
|
||||
# Remove the font, p and backcolor tag
|
||||
# Discourse doesn't support the font tag
|
||||
|
@ -390,11 +625,14 @@ class ImportScripts::DiscuzX < ImportScripts::Base
|
|||
|
||||
# Remove the hide tag
|
||||
s.gsub!(/\[\/?hide\]/i, '')
|
||||
s.gsub!(/\[\/?free[^\]]*\]/i, "\n")
|
||||
|
||||
# Remove the align tag
|
||||
# still don't know what it is
|
||||
s.gsub!(/\[align=[^\]]*?\]/i, '')
|
||||
s.gsub!(/\[align=[^\]]*?\]/i, "\n")
|
||||
s.gsub!(/\[\/align\]/i, "\n")
|
||||
s.gsub!(/\[float=[^\]]*?\]/i, "\n")
|
||||
s.gsub!(/\[\/float\]/i, "\n")
|
||||
|
||||
# Convert code
|
||||
s.gsub!(/\[\/?code\]/i, "\n```\n")
|
||||
|
@ -424,39 +662,65 @@ class ImportScripts::DiscuzX < ImportScripts::Base
|
|||
# [url][b]text[/b][/url] to **[url]text[/url]**
|
||||
s.gsub!(/(\[url=[^\[\]]*?\])\[b\](\S*)\[\/b\](\[\/url\])/, '**\1\2\3**')
|
||||
|
||||
s.gsub!(internal_url_regexp) do |discuzx_link|
|
||||
replace_internal_link(discuzx_link, $1)
|
||||
@internal_url_regexps.each do |internal_url_regexp|
|
||||
s.gsub!(internal_url_regexp) do |discuzx_link|
|
||||
replace_internal_link(discuzx_link, ($~[:tid].to_i rescue nil), ($~[:pid].to_i rescue nil), ($~[:fid].to_i rescue nil), ($~[:action] rescue nil))
|
||||
end
|
||||
end
|
||||
|
||||
# @someone without the url
|
||||
s.gsub!(/@\[url=[^\[\]]*?\](\S*)\[\/url\]/i, '@\1')
|
||||
|
||||
s.scan(/http(?:s)?:\/\/#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}\/[^\[\]\s]*/) {|link|puts "WARNING: post #{import_id} can't replace internal url #{link}"}
|
||||
|
||||
s.strip
|
||||
end
|
||||
|
||||
def replace_internal_link(discuzx_link, import_topic_id)
|
||||
results = mysql_query("SELECT pid
|
||||
FROM #{table_name 'forum_post'}
|
||||
WHERE tid = #{import_topic_id}
|
||||
ORDER BY pid ASC
|
||||
LIMIT 1")
|
||||
|
||||
return discuzx_link unless results.size > 0
|
||||
|
||||
linked_topic_id = results.first['pid']
|
||||
lookup = topic_lookup_from_imported_post_id(linked_topic_id)
|
||||
|
||||
return discuzx_link unless lookup
|
||||
|
||||
if (t = Topic.find_by(id: lookup[:topic_id]))
|
||||
"#{NEW_SITE_PREFIX}/t/#{t.slug}/#{t.id}"
|
||||
else
|
||||
discuzx_link
|
||||
def replace_internal_link(discuzx_link, import_topic_id, import_post_id, import_category_id, action)
|
||||
if import_post_id
|
||||
post_id = post_id_from_imported_post_id import_post_id
|
||||
if post_id
|
||||
post = Post.find post_id
|
||||
return post.full_url if post
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def internal_url_regexp
|
||||
@internal_url_regexp ||= Regexp.new("http(?:s)?://#{ORIGINAL_SITE_PREFIX.gsub('.', '\.')}/forum\\.php\\?mod=viewthread&tid=(\\d+)(?:[^\\]\\[]*)")
|
||||
if import_topic_id
|
||||
|
||||
results = mysql_query("SELECT pid
|
||||
FROM #{table_name 'forum_post'}
|
||||
WHERE tid = #{import_topic_id} AND first
|
||||
LIMIT 1")
|
||||
|
||||
return discuzx_link unless results.size > 0
|
||||
|
||||
linked_post_id = results.first['pid']
|
||||
lookup = topic_lookup_from_imported_post_id(linked_post_id)
|
||||
|
||||
if lookup
|
||||
return "#{NEW_SITE_PREFIX}#{lookup[:url]}"
|
||||
else
|
||||
return discuzx_link
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
if import_category_id
|
||||
category_id = category_id_from_imported_category_id import_category_id
|
||||
if category_id
|
||||
category = Category.find category_id
|
||||
return category.url if category
|
||||
end
|
||||
end
|
||||
|
||||
case action
|
||||
when 'index'
|
||||
return "#{NEW_SITE_PREFIX}/"
|
||||
when 'stat', 'stats', 'ranklist'
|
||||
return "#{NEW_SITE_PREFIX}/users"
|
||||
end
|
||||
|
||||
discuzx_link
|
||||
end
|
||||
|
||||
def pm_url_regexp
|
||||
|
@ -470,8 +734,7 @@ class ImportScripts::DiscuzX < ImportScripts::Base
|
|||
SiteSetting.authorized_extensions = setting if setting != SiteSetting.authorized_extensions
|
||||
|
||||
attachment_regex = /\[attach\](\d+)\[\/attach\]/
|
||||
|
||||
user = Discourse.system_user
|
||||
attachment_link_regex = /\[x-attach\](.+)\[\/x-attach\]/
|
||||
|
||||
current_count = 0
|
||||
total_count = mysql_query("SELECT count(*) count FROM #{table_name 'forum_post'};").first['count']
|
||||
|
@ -482,13 +745,20 @@ class ImportScripts::DiscuzX < ImportScripts::Base
|
|||
puts '', "Importing attachments...", ''
|
||||
|
||||
Post.find_each do |post|
|
||||
next unless post.custom_fields['import_id'] == post.custom_fields['import_id'].to_i.to_s
|
||||
|
||||
user = post.user
|
||||
|
||||
current_count += 1
|
||||
print_status current_count, total_count
|
||||
|
||||
new_raw = post.raw.dup
|
||||
|
||||
inline_attachments = []
|
||||
|
||||
new_raw.gsub!(attachment_regex) do |s|
|
||||
matches = attachment_regex.match(s)
|
||||
attachment_id = matches[1]
|
||||
attachment_id = $1.to_i
|
||||
inline_attachments.push attachment_id
|
||||
|
||||
upload, filename = find_upload(user, post, attachment_id)
|
||||
unless upload
|
||||
|
@ -498,6 +768,41 @@ class ImportScripts::DiscuzX < ImportScripts::Base
|
|||
|
||||
html_for_upload(upload, filename)
|
||||
end
|
||||
new_raw.gsub!(attachment_link_regex) do |s|
|
||||
attachment_file = $1
|
||||
|
||||
filename = File.basename(attachment_file)
|
||||
upload = create_upload(user.id, File.join(DISCUZX_BASE_DIR, attachment_file), filename)
|
||||
unless upload
|
||||
fail_count += 1
|
||||
next
|
||||
end
|
||||
|
||||
html_for_upload(upload, filename)
|
||||
end
|
||||
|
||||
sql = "SELECT aid
|
||||
FROM #{table_name 'forum_attachment'}
|
||||
WHERE pid = #{post.custom_fields['import_id']}"
|
||||
if !inline_attachments.empty?
|
||||
sql << " AND aid NOT IN (#{inline_attachments.join(',')})"
|
||||
end
|
||||
|
||||
results = mysql_query(sql)
|
||||
|
||||
results.each do |attachment|
|
||||
attachment_id = attachment['aid']
|
||||
upload, filename = find_upload(user, post, attachment_id)
|
||||
unless upload
|
||||
fail_count += 1
|
||||
next
|
||||
end
|
||||
html = html_for_upload(upload, filename)
|
||||
unless new_raw.include? html
|
||||
new_raw << "\n"
|
||||
new_raw << html
|
||||
end
|
||||
end
|
||||
|
||||
if new_raw != post.raw
|
||||
PostRevisor.new(post).revise!(post.user, { raw: new_raw }, { bypass_bump: true, edit_reason: '从 Discuz 中导入附件' })
|
||||
|
@ -513,7 +818,7 @@ class ImportScripts::DiscuzX < ImportScripts::Base
|
|||
end
|
||||
|
||||
# Create the full path to the discuz avatar specified from user id
|
||||
def discuzx_avatar_fullpath(user_id)
|
||||
def discuzx_avatar_fullpath(user_id, absolute=true)
|
||||
padded_id = user_id.to_s.rjust(9, '0')
|
||||
|
||||
part_1 = padded_id[0..2]
|
||||
|
@ -522,16 +827,23 @@ class ImportScripts::DiscuzX < ImportScripts::Base
|
|||
part_4 = padded_id[-2..-1]
|
||||
file_name = "#{part_4}_avatar_big.jpg"
|
||||
|
||||
return File.join(DISCUZX_BASE_DIR, AVATAR_DIR, part_1, part_2, part_3, file_name), file_name
|
||||
if absolute
|
||||
return File.join(DISCUZX_BASE_DIR, AVATAR_DIR, part_1, part_2, part_3, file_name), file_name
|
||||
else
|
||||
return File.join(AVATAR_DIR, part_1, part_2, part_3, file_name), file_name
|
||||
end
|
||||
end
|
||||
|
||||
# post id is in the quote block
|
||||
def find_post_id_by_quote_number(raw)
|
||||
s = raw.dup
|
||||
quote_reply = s.match(/\[quote\][\S\s]*pid=(\d+)[\S\s]*\[\/quote\]/)
|
||||
reply = s.match(/url=forum.php\?mod=redirect&goto=findpost&pid=(\d+)&ptid=\d+/)
|
||||
|
||||
quote_reply ? quote_reply[1] : (reply ? reply[1] : nil)
|
||||
case raw
|
||||
when /\[url=forum.php\?mod=redirect&goto=findpost&pid=(\d+)&ptid=\d+\]/ #standard
|
||||
$1
|
||||
when /\[url=https?:\/\/#{ORIGINAL_SITE_PREFIX}\/redirect.php\?goto=findpost&pid=(\d+)&ptid=\d+\]/ # old discuz 7 format
|
||||
$1
|
||||
when /\[quote\][\S\s]*pid=(\d+)[\S\s]*\[\/quote\]/ # quote
|
||||
$1
|
||||
end
|
||||
end
|
||||
|
||||
# for some reason, discuz inlined some png file
|
||||
|
@ -632,6 +944,10 @@ class ImportScripts::DiscuzX < ImportScripts::Base
|
|||
return nil
|
||||
end
|
||||
|
||||
def first_exists(*items)
|
||||
items.find{|item|!item.blank?} || ''
|
||||
end
|
||||
|
||||
def mysql_query(sql)
|
||||
@client.query(sql, cache_rows: false)
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue
Block a user