2015-01-29 15:06:30 +08:00
# encoding: utf-8
#
# Author: Erick Guan <fantasticfears@gmail.com>
#
# This script import the data from latest Discuz! X
# Should work among Discuz! X3.x
# This script is tested only on Simplified Chinese Discuz! X instances
# If you want to import data other than Simplified Chinese, email me.
2016-04-01 05:33:25 +08:00
require 'php_serialize'
require 'miro'
2015-01-29 15:06:30 +08:00
require 'mysql2'
2015-03-19 03:30:42 +08:00
require File . expand_path ( File . dirname ( __FILE__ ) + " /base.rb " )
2015-01-29 15:06:30 +08:00
class ImportScripts :: DiscuzX < ImportScripts :: Base
DISCUZX_DB = " ultrax "
DB_TABLE_PREFIX = 'pre_'
BATCH_SIZE = 1000
ORIGINAL_SITE_PREFIX = " oldsite.example.com/forums " # without http(s)://
NEW_SITE_PREFIX = " http://discourse.example.com " # with http:// or https://
# Set DISCUZX_BASE_DIR to the base directory of your discuz installation.
DISCUZX_BASE_DIR = '/var/www/discuz/upload'
AVATAR_DIR = '/uc_server/data/avatar'
ATTACHMENT_DIR = '/data/attachment/forum'
AUTHORIZED_EXTENSIONS = [ 'jpg' , 'jpeg' , 'png' , 'gif' , 'zip' , 'rar' , 'pdf' ]
def initialize
super
@client = Mysql2 :: Client . new (
host : " localhost " ,
username : " root " ,
#password: "password",
database : DISCUZX_DB
)
@first_post_id_by_topic_id = { }
2016-04-01 05:33:25 +08:00
@internal_url_regexps = [
/ http(?:s)?: \/ \/ #{ ORIGINAL_SITE_PREFIX . gsub ( '.' , '\.' ) } \/ forum \ .php \ ?mod=viewthread(?:&|&)tid=(?<tid> \ d+)(?:[^ \ [ \ ] \ s]*)(?:pid=?(?<pid> \ d+))?(?:[^ \ [ \ ] \ s]*) / ,
/ http(?:s)?: \/ \/ #{ ORIGINAL_SITE_PREFIX . gsub ( '.' , '\.' ) } \/ viewthread \ .php \ ?tid=(?<tid> \ d+)(?:[^ \ [ \ ] \ s]*)(?:pid=?(?<pid> \ d+))?(?:[^ \ [ \ ] \ s]*) / ,
/ http(?:s)?: \/ \/ #{ ORIGINAL_SITE_PREFIX . gsub ( '.' , '\.' ) } \/ forum \ .php \ ?mod=redirect(?:&|&)goto=findpost(?:&|&)pid=(?<pid> \ d+)(?:&|&)ptid=(?<tid> \ d+)(?:[^ \ [ \ ] \ s]*) / ,
/ http(?:s)?: \/ \/ #{ ORIGINAL_SITE_PREFIX . gsub ( '.' , '\.' ) } \/ redirect \ .php \ ?goto=findpost(?:&|&)pid=(?<pid> \ d+)(?:&|&)ptid=(?<tid> \ d+)(?:[^ \ [ \ ] \ s]*) / ,
/ http(?:s)?: \/ \/ #{ ORIGINAL_SITE_PREFIX . gsub ( '.' , '\.' ) } \/ forumdisplay \ .php \ ?fid=(?<fid> \ d+)(?:[^ \ [ \ ] \ s]*) / ,
/ http(?:s)?: \/ \/ #{ ORIGINAL_SITE_PREFIX . gsub ( '.' , '\.' ) } \/ forum \ .php \ ?mod=forumdisplay(?:&|&)fid=(?<fid> \ d+)(?:[^ \ [ \ ] \ s]*) / ,
/ http(?:s)?: \/ \/ #{ ORIGINAL_SITE_PREFIX . gsub ( '.' , '\.' ) } \/ (?<action>index) \ .php(?:[^ \ [ \ ] \ s]*) / ,
/ http(?:s)?: \/ \/ #{ ORIGINAL_SITE_PREFIX . gsub ( '.' , '\.' ) } \/ (?<action>stats) \ .php(?:[^ \ [ \ ] \ s]*) / ,
/ http(?:s)?: \/ \/ #{ ORIGINAL_SITE_PREFIX . gsub ( '.' , '\.' ) } \/ misc.php \ ?mod=(?<mod>stat|ranklist)(?:[^ \ [ \ ] \ s]*) /
]
2015-01-29 15:06:30 +08:00
end
def execute
2016-04-01 05:33:25 +08:00
get_knowledge_about_duplicated_email
2015-01-29 15:06:30 +08:00
import_users
import_categories
import_posts
import_private_messages
import_attachments
end
# add the prefix to the table name
def table_name ( name = nil )
DB_TABLE_PREFIX + name
end
# find which group members can be granted as admin
def get_knowledge_about_group
group_table = table_name 'common_usergroup'
result = mysql_query (
2016-04-01 05:33:25 +08:00
" SELECT groupid group_id, radminid role_id
2015-01-29 15:06:30 +08:00
FROM #{group_table};")
2016-04-01 05:33:25 +08:00
@moderator_group_id = [ ]
@admin_group_id = [ ]
#@banned_group_id = [4,5] # 禁止的用户及其帖子均不导入,如果你想导入这些用户和帖子,请把这个数组清空。
2015-01-29 15:06:30 +08:00
result . each do | group |
2016-04-01 05:33:25 +08:00
case group [ 'role_id' ]
when 1 # 管理员
@admin_group_id << group [ 'group_id' ]
when 2 , 3 # 超级版主、版主。如果你不希望原普通版主成为Discourse版主, 把3去掉。
@moderator_group_id << group [ 'group_id' ]
end
end
end
def get_knowledge_about_category_slug
@category_slug = { }
results = mysql_query ( " SELECT svalue value
FROM #{table_name 'common_setting'}
WHERE skey = 'forumkeys' " )
return if results . size < 1
value = results . first [ 'value' ]
return if value . blank?
PHP . unserialize ( value ) . each do | category_import_id , slug |
next if slug . blank?
@category_slug [ category_import_id ] = slug
end
end
def get_knowledge_about_duplicated_email
@duplicated_email = { }
results = mysql_query (
" select a.uid uid, b.uid import_id from pre_common_member a
join ( select uid , email from pre_common_member group by email having count ( email ) > 1 order by uid asc ) b USING ( email )
where a . uid != b . uid " )
users = @lookup . instance_variable_get :@users
results . each do | row |
@duplicated_email [ row [ 'uid' ] ] = row [ 'import_id' ]
user_id = users [ row [ 'import_id' ] ]
if user_id
users [ row [ 'uid' ] ] = user_id
2015-01-29 15:06:30 +08:00
end
end
end
def import_users
puts '' , " creating users "
get_knowledge_about_group
sensitive_user_table = table_name 'ucenter_members'
user_table = table_name 'common_member'
profile_table = table_name 'common_member_profile'
status_table = table_name 'common_member_status'
2016-04-01 05:33:25 +08:00
forum_table = table_name 'common_member_field_forum'
home_table = table_name 'common_member_field_home'
2015-01-29 15:06:30 +08:00
total_count = mysql_query ( " SELECT count(*) count FROM #{ user_table } ; " ) . first [ 'count' ]
batches ( BATCH_SIZE ) do | offset |
results = mysql_query (
2016-04-01 05:33:25 +08:00
" SELECT u.uid id, u.username username, u.email email, u.groupid group_id,
su . regdate regdate , su . password password_hash , su . salt salt ,
s . regip regip , s . lastip last_visit_ip , s . lastvisit last_visit_time , s . lastpost last_posted_at , s . lastsendmail last_emailed_at ,
u . emailstatus email_confirmed , u . avatarstatus avatar_exists ,
p . site website , p . address address , p . bio bio , p . realname realname , p . qq qq ,
p . resideprovince resideprovince , p . residecity residecity , p . residedist residedist , p . residecommunity residecommunity ,
p . resideprovince birthprovince , p . birthcity birthcity , p . birthdist birthdist , p . birthcommunity birthcommunity ,
h . spacecss spacecss , h . spacenote spacenote ,
f . customstatus customstatus , f . sightml sightml
2015-01-29 15:06:30 +08:00
FROM #{user_table} u
2016-04-01 05:33:25 +08:00
LEFT JOIN #{sensitive_user_table} su USING(uid)
LEFT JOIN #{profile_table} p USING(uid)
LEFT JOIN #{status_table} s USING(uid)
LEFT JOIN #{forum_table} f USING(uid)
LEFT JOIN #{home_table} h USING(uid)
2015-01-29 15:06:30 +08:00
ORDER BY u . uid ASC
LIMIT #{BATCH_SIZE}
OFFSET #{offset};")
break if results . size < 1
2016-04-01 05:33:25 +08:00
# TODO: breaks the scipt reported by some users
# next if all_records_exist? :users, users.map {|u| u["id"].to_i}
2015-09-22 07:48:42 +08:00
2015-01-29 15:06:30 +08:00
create_users ( results , total : total_count , offset : offset ) do | user |
{ id : user [ 'id' ] ,
email : user [ 'email' ] ,
username : user [ 'username' ] ,
2016-04-01 05:33:25 +08:00
name : first_exists ( user [ 'realname' ] , user [ 'customstatus' ] , user [ 'username' ] ) ,
import_pass : user [ 'password_hash' ] ,
active : true ,
salt : user [ 'salt' ] ,
# TODO: title: user['customstatus'], # move custom title to name since discourse can't let user custom title https://meta.discourse.org/t/let-users-custom-their-title/37626
created_at : user [ 'regdate' ] ? Time . zone . at ( user [ 'regdate' ] ) : nil ,
2015-01-29 15:06:30 +08:00
registration_ip_address : user [ 'regip' ] ,
ip_address : user [ 'last_visit_ip' ] ,
last_seen_at : user [ 'last_visit_time' ] ,
last_emailed_at : user [ 'last_emailed_at' ] ,
last_posted_at : user [ 'last_posted_at' ] ,
2016-04-01 05:33:25 +08:00
moderator : @moderator_group_id . include? ( user [ 'group_id' ] ) ,
admin : @admin_group_id . include? ( user [ 'group_id' ] ) ,
website : ( user [ 'website' ] and user [ 'website' ] . include? ( '.' ) ) ? user [ 'website' ] . strip : ( user [ 'qq' ] and user [ 'qq' ] . strip == user [ 'qq' ] . strip . to_i and user [ 'qq' ] . strip . to_i > 10000 ) ? 'http://user.qzone.qq.com/' + user [ 'qq' ] . strip : nil ,
bio_raw : first_exists ( ( user [ 'bio' ] and CGI . unescapeHTML ( user [ 'bio' ] ) ) , user [ 'sightml' ] , user [ 'spacenote' ] ) . strip [ 0 , 3000 ] ,
location : first_exists ( user [ 'address' ] , ( ! user [ 'resideprovince' ] . blank? ? [ user [ 'resideprovince' ] , user [ 'residecity' ] , user [ 'residedist' ] , user [ 'residecommunity' ] ] : [ user [ 'birthprovince' ] , user [ 'birthcity' ] , user [ 'birthdist' ] , user [ 'birthcommunity' ] ] ) . reject { | location | location . blank? } . join ( ' ' ) ) ,
2015-01-29 15:06:30 +08:00
post_create_action : lambda do | newmember |
if user [ 'avatar_exists' ] == 1 and newmember . uploaded_avatar_id . blank?
path , filename = discuzx_avatar_fullpath ( user [ 'id' ] )
if path
begin
upload = create_upload ( newmember . id , path , filename )
2016-04-01 05:33:25 +08:00
if ! upload . nil? && upload . persisted?
2015-01-29 15:06:30 +08:00
newmember . import_mode = false
newmember . create_user_avatar
newmember . import_mode = true
newmember . user_avatar . update ( custom_upload_id : upload . id )
newmember . update ( uploaded_avatar_id : upload . id )
else
puts " Error: Upload did not persist! "
end
rescue SystemCallError = > err
puts " Could not import avatar: #{ err . message } "
end
end
end
2016-04-01 05:33:25 +08:00
if ! user [ 'spacecss' ] . blank? and newmember . user_profile . profile_background . blank?
# profile background
if matched = user [ 'spacecss' ] . match ( / body \ s*{[^}]*url \ ('?(.+?)'? \ ) /i )
body_background = matched [ 1 ] . split ( ORIGINAL_SITE_PREFIX , 2 ) . last
end
if matched = user [ 'spacecss' ] . match ( / # hd \ s*{[^}]*url \ ('?(.+?)'? \ ) /i )
header_background = matched [ 1 ] . split ( ORIGINAL_SITE_PREFIX , 2 ) . last
end
if matched = user [ 'spacecss' ] . match ( / .blocktitle \ s*{[^}]*url \ ('?(.+?)'? \ ) /i )
blocktitle_background = matched [ 1 ] . split ( ORIGINAL_SITE_PREFIX , 2 ) . last
end
if matched = user [ 'spacecss' ] . match ( / # ct \ s*{[^}]*url \ ('?(.+?)'? \ ) /i )
content_background = matched [ 1 ] . split ( ORIGINAL_SITE_PREFIX , 2 ) . last
end
if body_background || header_background || blocktitle_background || content_background
profile_background = first_exists ( header_background , body_background , content_background , blocktitle_background )
card_background = first_exists ( content_background , body_background , header_background , blocktitle_background )
upload = create_upload ( newmember . id , File . join ( DISCUZX_BASE_DIR , profile_background ) , File . basename ( profile_background ) )
if upload
newmember . user_profile . upload_profile_background upload
else
puts " WARNING: #{ user [ 'username' ] } (UID: #{ user [ 'id' ] } ) profile_background file did not persist! "
end
upload = create_upload ( newmember . id , File . join ( DISCUZX_BASE_DIR , card_background ) , File . basename ( card_background ) )
if upload
newmember . user_profile . upload_card_background upload
else
puts " WARNING: #{ user [ 'username' ] } (UID: #{ user [ 'id' ] } ) card_background file did not persist! "
end
end
end
2015-01-29 15:06:30 +08:00
# we don't send email to the unconfirmed user
newmember . update ( email_digests : user [ 'email_confirmed' ] == 1 ) if newmember . email_digests
2016-04-01 05:33:25 +08:00
newmember . update ( name : '' ) if ! newmember . name . blank? and newmember . name == newmember . username
2015-01-29 15:06:30 +08:00
end
}
end
end
end
def import_categories
puts '' , " creating categories "
2016-04-01 05:33:25 +08:00
get_knowledge_about_category_slug
2015-01-29 15:06:30 +08:00
forums_table = table_name 'forum_forum'
forums_data_table = table_name 'forum_forumfield'
results = mysql_query ( "
SELECT f . fid id , f . fup parent_id , f . name , f . type type , f . status status , f . displayorder position ,
2016-04-01 05:33:25 +08:00
d . description description , d . rules rules , d . icon , d . extra extra
2015-01-29 15:06:30 +08:00
FROM #{forums_table} f
2016-04-01 05:33:25 +08:00
LEFT JOIN #{forums_data_table} d USING(fid)
2015-01-29 15:06:30 +08:00
ORDER BY parent_id ASC , id ASC
" )
max_position = Category . all . max_by ( & :position ) . position
create_categories ( results ) do | row |
2016-04-01 05:33:25 +08:00
next if row [ 'type' ] == 'group' or row [ 'status' ] == 2 # or row['status'].to_i == 3 # 如果不想导入群组,取消注释
extra = PHP . unserialize ( row [ 'extra' ] ) if ! row [ 'extra' ] . blank?
if extra and ! extra [ " namecolor " ] . blank?
color = extra [ " namecolor " ] [ 1 , 6 ]
end
2015-01-29 15:06:30 +08:00
Category . all . max_by ( & :position ) . position
2016-04-01 05:33:25 +08:00
2015-01-29 15:06:30 +08:00
h = {
id : row [ 'id' ] ,
name : row [ 'name' ] ,
description : row [ 'description' ] ,
2016-04-01 05:33:25 +08:00
position : row [ 'position' ] . to_i + max_position ,
color : color ,
suppress_from_homepage : ( row [ 'status' ] == 0 or row [ 'status' ] == 3 ) ,
post_create_action : lambda do | category |
if slug = @category_slug [ row [ 'id' ] ]
category . update ( slug : slug )
end
raw = process_discuzx_post ( row [ 'rules' ] , nil )
if @bbcode_to_md
raw = raw . bbcode_to_md ( false ) rescue raw
end
category . topic . posts . first . update_attribute ( :raw , raw )
if ! row [ 'icon' ] . empty?
upload = create_upload ( Discourse :: SYSTEM_USER_ID , File . join ( DISCUZX_BASE_DIR , ATTACHMENT_DIR , '../common' , row [ 'icon' ] ) , File . basename ( row [ 'icon' ] ) )
if upload
category . logo_url = upload . url
# FIXME: I don't know how to get '/shared' by script. May change to Rails.root
category . color = Miro :: DominantColors . new ( File . join ( '/shared' , category . logo_url ) ) . to_hex . first [ 1 , 6 ] if ! color
category . save!
end
end
category
end
2015-01-29 15:06:30 +08:00
}
if row [ 'parent_id' ] . to_i > 0
2015-03-13 04:15:02 +08:00
h [ :parent_category_id ] = category_id_from_imported_category_id ( row [ 'parent_id' ] )
2015-01-29 15:06:30 +08:00
end
h
end
end
def import_posts
puts " " , " creating topics and posts "
2016-04-01 05:33:25 +08:00
users_table = table_name 'common_member'
2015-01-29 15:06:30 +08:00
posts_table = table_name 'forum_post'
topics_table = table_name 'forum_thread'
total_count = mysql_query ( " SELECT count(*) count FROM #{ posts_table } " ) . first [ 'count' ]
batches ( BATCH_SIZE ) do | offset |
results = mysql_query ( "
SELECT p . pid id ,
p . tid topic_id ,
t . fid category_id ,
t . subject title ,
p . authorid user_id ,
p . message raw ,
p . dateline post_time ,
2016-04-01 05:33:25 +08:00
p2 . pid first_id ,
p . invisible status ,
t . special special
FROM #{posts_table} p
JOIN #{posts_table} p2 ON p2.first AND p2.tid = p.tid
JOIN #{topics_table} t ON t.tid = p.tid
where t . tid < 10000
2015-01-29 15:06:30 +08:00
ORDER BY id ASC , topic_id ASC
LIMIT #{BATCH_SIZE}
OFFSET #{offset};
" )
2016-04-01 05:33:25 +08:00
# u.status != -1 AND u.groupid != 4 AND u.groupid != 5 用户未被锁定、禁访或禁言。在现实中的 Discuz 论坛,禁止的用户通常是广告机或驱逐的用户,这些不需要导入。
2015-01-29 15:06:30 +08:00
break if results . size < 1
2015-09-22 07:48:42 +08:00
next if all_records_exist? :posts , results . map { | p | p [ " id " ] . to_i }
2015-01-29 15:06:30 +08:00
create_posts ( results , total : total_count , offset : offset ) do | m |
skip = false
mapped = { }
mapped [ :id ] = m [ 'id' ]
mapped [ :user_id ] = user_id_from_imported_user_id ( m [ 'user_id' ] ) || - 1
mapped [ :raw ] = process_discuzx_post ( m [ 'raw' ] , m [ 'id' ] )
mapped [ :created_at ] = Time . zone . at ( m [ 'post_time' ] )
2016-04-01 05:33:25 +08:00
if m [ 'id' ] == m [ 'first_id' ]
2015-03-13 04:15:02 +08:00
mapped [ :category ] = category_id_from_imported_category_id ( m [ 'category_id' ] )
2015-01-29 15:06:30 +08:00
mapped [ :title ] = CGI . unescapeHTML ( m [ 'title' ] )
2016-04-01 05:33:25 +08:00
if m [ 'special' ] == 1
results = mysql_query ( "
SELECT multiple , maxchoices
FROM #{table_name 'forum_poll'}
WHERE tid = #{m['topic_id']}")
poll = results . first || { }
results = mysql_query ( "
SELECT polloption
FROM #{table_name 'forum_polloption'}
WHERE tid = #{m['topic_id']}
ORDER BY displayorder " )
if results . empty?
puts " WARNING: can't find poll options for topic #{ m [ 'topic_id' ] } , skip poll "
else
mapped [ :raw ] . prepend " [poll #{ poll [ 'multiple' ] ? ' type=multiple' : '' } #{ poll [ 'maxchoices' ] > 0 ? " max= #{ poll [ 'maxchoices' ] } " : '' } ] \n #{ results . map { | option | '- ' + option [ 'polloption' ] } . join ( " \n " ) } \n [/poll] \n "
end
end
2015-01-29 15:06:30 +08:00
else
2016-04-01 05:33:25 +08:00
parent = topic_lookup_from_imported_post_id ( m [ 'first_id' ] )
2015-01-29 15:06:30 +08:00
if parent
mapped [ :topic_id ] = parent [ :topic_id ]
2016-04-01 05:33:25 +08:00
reply_post_import_id = find_post_id_by_quote_number ( m [ 'raw' ] )
if reply_post_import_id
post_id = post_id_from_imported_post_id ( reply_post_import_id . to_i )
if ( post = Post . find_by ( id : post_id ) )
if post . topic_id == mapped [ :topic_id ]
mapped [ :reply_to_post_number ] = post . post_number
else
puts " post #{ m [ 'id' ] } reply to another topic, skip reply "
end
else
puts " post #{ m [ 'id' ] } reply to not exists post #{ reply_post_import_id } , skip reply "
end
2015-01-29 15:06:30 +08:00
end
else
puts " Parent topic #{ m [ 'topic_id' ] } doesn't exist. Skipping #{ m [ 'id' ] } : #{ m [ 'title' ] [ 0 .. 40 ] } "
skip = true
end
2016-04-01 05:33:25 +08:00
2015-01-29 15:06:30 +08:00
end
2016-04-01 05:33:25 +08:00
if m [ 'status' ] & 1 == 1 || mapped [ :raw ] . blank?
2015-01-29 15:06:30 +08:00
mapped [ :post_create_action ] = lambda do | post |
PostDestroyer . new ( Discourse . system_user , post ) . perform_delete
end
2016-04-01 05:33:25 +08:00
elsif ( m [ 'status' ] & 2 ) >> 1 == 1 # waiting for approve
2015-01-29 15:06:30 +08:00
mapped [ :post_create_action ] = lambda do | post |
PostAction . act ( Discourse . system_user , post , 6 , { take_action : false } )
end
end
skip ? nil : mapped
end
end
end
2016-04-01 05:33:25 +08:00
def import_bookmarks
puts '' , 'creating bookmarks'
favorites_table = table_name 'home_favorite'
posts_table = table_name 'forum_post'
total_count = mysql_query ( " SELECT count(*) count FROM #{ favorites_table } WHERE idtype = 'tid' " ) . first [ 'count' ]
batches ( BATCH_SIZE ) do | offset |
results = mysql_query ( "
SELECT p . pid post_id , f . uid user_id
FROM #{favorites_table} f
JOIN #{posts_table} p ON f.id = p.tid
WHERE f . idtype = 'tid' AND p . first
LIMIT #{BATCH_SIZE}
OFFSET #{offset};")
break if results . size < 1
# next if all_records_exist?
create_bookmarks ( results , total : total_count , offset : offset ) do | row |
{
user_id : row [ 'user_id' ] ,
post_id : row [ 'post_id' ]
}
end
end
end
2015-01-29 15:06:30 +08:00
def import_private_messages
puts '' , 'creating private messages'
pm_indexes = table_name 'ucenter_pm_indexes'
pm_messages = table_name 'ucenter_pm_messages'
total_count = mysql_query ( " SELECT count(*) count FROM #{ pm_indexes } " ) . first [ 'count' ]
batches ( BATCH_SIZE ) do | offset |
results = mysql_query ( "
SELECT pmid id , plid thread_id , authorid user_id , message , dateline created_at
FROM #{pm_messages}_1
UNION SELECT pmid id , plid thread_id , authorid user_id , message , dateline created_at
FROM #{pm_messages}_2
UNION SELECT pmid id , plid thread_id , authorid user_id , message , dateline created_at
FROM #{pm_messages}_3
UNION SELECT pmid id , plid thread_id , authorid user_id , message , dateline created_at
FROM #{pm_messages}_4
UNION SELECT pmid id , plid thread_id , authorid user_id , message , dateline created_at
FROM #{pm_messages}_5
UNION SELECT pmid id , plid thread_id , authorid user_id , message , dateline created_at
FROM #{pm_messages}_6
UNION SELECT pmid id , plid thread_id , authorid user_id , message , dateline created_at
FROM #{pm_messages}_7
UNION SELECT pmid id , plid thread_id , authorid user_id , message , dateline created_at
FROM #{pm_messages}_8
UNION SELECT pmid id , plid thread_id , authorid user_id , message , dateline created_at
FROM #{pm_messages}_9
ORDER BY thread_id ASC , id ASC
LIMIT #{BATCH_SIZE}
OFFSET #{offset};")
break if results . size < 1
2016-04-01 05:33:25 +08:00
# next if all_records_exist? :posts, results.map {|m| "pm:#{m['id']}"}
2015-09-22 07:48:42 +08:00
2015-01-29 15:06:30 +08:00
create_posts ( results , total : total_count , offset : offset ) do | m |
skip = false
mapped = { }
mapped [ :id ] = " pm: #{ m [ 'id' ] } "
mapped [ :user_id ] = user_id_from_imported_user_id ( m [ 'user_id' ] ) || - 1
mapped [ :raw ] = process_discuzx_post ( m [ 'message' ] , m [ 'id' ] )
mapped [ :created_at ] = Time . zone . at ( m [ 'created_at' ] )
thread_id = " pm_ #{ m [ 'thread_id' ] } "
if is_first_pm ( m [ 'id' ] , m [ 'thread_id' ] )
# find the title from list table
pm_thread = mysql_query ( "
SELECT plid thread_id , subject
FROM #{table_name 'ucenter_pm_lists'}
WHERE plid = #{m['thread_id']};").first
mapped [ :title ] = pm_thread [ 'subject' ]
mapped [ :archetype ] = Archetype . private_message
# Find the users who are part of this private message.
import_user_ids = mysql_query ( "
SELECT plid thread_id , uid user_id
FROM #{table_name 'ucenter_pm_members'}
WHERE plid = #{m['thread_id']};
" ).map {|r| r['user_id']}.uniq
mapped [ :target_usernames ] = import_user_ids . map! do | import_user_id |
import_user_id . to_s == m [ 'user_id' ] . to_s ? nil : User . find_by ( id : user_id_from_imported_user_id ( import_user_id ) ) . try ( :username )
end . compact
if mapped [ :target_usernames ] . empty? # pm with yourself?
skip = true
puts " Skipping pm: #{ m [ 'id' ] } due to no target "
else
@first_post_id_by_topic_id [ thread_id ] = mapped [ :id ]
end
else
parent = topic_lookup_from_imported_post_id ( @first_post_id_by_topic_id [ thread_id ] )
if parent
mapped [ :topic_id ] = parent [ :topic_id ]
else
puts " Parent post pm thread: #{ thread_id } doesn't exist. Skipping #{ m [ " id " ] } : #{ m [ " message " ] [ 0 .. 40 ] } "
skip = true
end
end
skip ? nil : mapped
end
end
end
# search for first pm id for the series of pm
def is_first_pm ( pm_id , thread_id )
result = mysql_query ( "
SELECT pmid id
FROM #{table_name 'ucenter_pm_indexes'}
WHERE plid = #{thread_id}
ORDER BY id " )
result . first [ 'id' ] . to_s == pm_id . to_s
end
2016-04-01 05:33:25 +08:00
def process_and_upload_inline_images ( raw )
2015-01-29 15:06:30 +08:00
inline_image_regex = / \ [img \ ]([ \ s \ S]*?) \ [ \/ img \ ] /
2016-04-01 05:33:25 +08:00
2015-01-29 15:06:30 +08:00
s = raw . dup
s . gsub! ( inline_image_regex ) do | d |
matches = inline_image_regex . match ( d )
data = matches [ 1 ]
upload , filename = upload_inline_image data
upload ? html_for_upload ( upload , filename ) : nil
end
2016-04-01 05:33:25 +08:00
end
def process_discuzx_post ( raw , import_id )
# raw = process_and_upload_inline_images(raw)
s = raw . dup
2015-01-29 15:06:30 +08:00
# Strip the quote
# [quote] quotation includes the topic which is the same as reply to in Discourse
# We get the pid to find the post number the post reply to. So it can be stripped
2015-02-11 13:43:16 +08:00
s = s . gsub ( / \ [b \ ]回复 \ [url=forum.php \ ?mod=redirect&goto=findpost&pid= \ d+&ptid= \ d+ \ ].* 的帖子 \ [ \/ url \ ] \ [ \/ b \ ] /i , '' ) . strip
2016-04-01 05:33:25 +08:00
s = s . gsub ( / \ [b \ ]回复 \ [url=https?: \/ \/ #{ ORIGINAL_SITE_PREFIX } \/ redirect.php \ ?goto=findpost&pid= \ d+&ptid= \ d+ \ ].*? \ [ \/ url \ ].*? \ [ \/ b \ ] /i , '' ) . strip
s . gsub! ( / \ [quote \ ](.*)? \ [ \/ quote \ ] /im ) do | matched |
content = $1
post_import_id = find_post_id_by_quote_number ( content )
if post_import_id
post_id = post_id_from_imported_post_id ( post_import_id . to_i )
if ( post = Post . find_by ( id : post_id ) )
" [quote= \" #{ post . user . username } \" , post: #{ post . post_number } , topic: #{ post . topic_id } ] \n #{ content } \n [/quote] "
else
puts " post #{ import_id } quote to not exists post #{ post_import_id } , skip reply "
matched [ 0 ]
end
else
matched [ 0 ]
end
end
2015-02-11 13:43:16 +08:00
2016-04-01 05:33:25 +08:00
s . gsub! ( / \ [size=2 \ ] \ [color= # 999999 \ ].*? 发表于 [ \ d \ - \ : ]* \ [ \/ color \ ] \ [url=forum.php \ ?mod=redirect&goto=findpost&pid= \ d+&ptid= \ d+ \ ].*? \ [ \/ url \ ] \ [ \/ size \ ] /i , '' )
s . gsub! ( / \ [size=2 \ ] \ [color= # 999999 \ ].*? 发表于 [ \ d \ - \ : ]* \ [ \/ color \ ] \ [url=https?: \/ \/ #{ ORIGINAL_SITE_PREFIX } \/ redirect.php \ ?goto=findpost&pid= \ d+&ptid= \ d+ \ ].*? \ [ \/ url \ ] \ [ \/ size \ ] /i , '' )
# convert quote
s . gsub! ( / \ [quote \ ](.*?) \ [ \/ quote \ ] /m ) { " \n " + ( $1 . strip ) . gsub ( / ^ / , '> ' ) + " \n " }
# truncate line space, preventing line starting with many blanks to be parsed as code blocks
s . gsub! ( / ^ {4,} / , ' ' )
# TODO: Much better to use bbcode-to-md gem
# Convert image bbcode with width and height
s . gsub! ( / \ [img[^ \ ]]* \ ]https?: \/ \/ #{ ORIGINAL_SITE_PREFIX } \/ (.*) \ [ \/ img \ ] /i , '[x-attach]\1[/x-attach]' ) # dont convert attachment
s . gsub! ( / <img[^>]*src="https?: \/ \/ #{ ORIGINAL_SITE_PREFIX } \/ (.*)".*?> /i , '[x-attach]\1[/x-attach]' ) # dont convert attachment
s . gsub! ( / \ [img[^ \ ]]* \ ]https?: \/ \/ www \ .touhou \ .cc \/ blog \/ (.*) \ [ \/ img \ ] /i , '[x-attach]../blog/\1[/x-attach]' ) # 私货
s . gsub! ( / \ [img[^ \ ]]* \ ]https?: \/ \/ www \ .touhou \ .cc \/ ucenter \/ avatar.php \ ?uid=( \ d+)[^ \ ]]* \ [ \/ img \ ] /i ) { " [x-attach] #{ discuzx_avatar_fullpath ( $1 , false ) [ 0 ] } [/x-attach] " } # 私货
2015-02-11 13:43:16 +08:00
s . gsub! ( / \ [img=( \ d+),( \ d+) \ ]([^ \ ]]*) \ [ \/ img \ ] /i , '<img width="\1" height="\2" src="\3">' )
2016-04-01 05:33:25 +08:00
s . gsub! ( / \ [img \ ]([^ \ ]]*) \ [ \/ img \ ] /i , '<img src="\1">' )
s . gsub! ( / \ [qq \ ]([^ \ ]]*) \ [ \/ qq \ ] /i , '<a href="http://wpa.qq.com/msgrd?V=3&Uin=\1&Site=[Discuz!]&from=discuz&Menu=yes" target="_blank"><!--<img src="static/image/common/qq_big.gif" border="0">-->QQ 交谈</a>' )
s . gsub! ( / \ [email \ ]([^ \ ]]*) \ [ \/ email \ ] /i , '[url=mailto:\1]\1[/url]' ) # bbcode-to-md can convert it
s . gsub! ( / \ [s \ ]([^ \ ]]*) \ [ \/ s \ ] /i , '<s>\1</s>' )
s . gsub! ( / \ [sup \ ]([^ \ ]]*) \ [ \/ sup \ ] /i , '<sup>\1</sup>' )
s . gsub! ( / \ [sub \ ]([^ \ ]]*) \ [ \/ sub \ ] /i , '<sub>\1</sub>' )
s . gsub! ( / \ [hr \ ] /i , " \n --- \n " )
# remove the media tag
s . gsub! ( / \ [ \/ ?media[^ \ ]]* \ ] /i , " \n " )
s . gsub! ( / \ [ \/ ?flash[^ \ ]]* \ ] /i , " \n " )
s . gsub! ( / \ [ \/ ?audio[^ \ ]]* \ ] /i , " \n " )
s . gsub! ( / \ [ \/ ?video[^ \ ]]* \ ] /i , " \n " )
2015-01-29 15:06:30 +08:00
2015-03-24 17:59:08 +08:00
# Remove the font, p and backcolor tag
2015-01-29 15:06:30 +08:00
# Discourse doesn't support the font tag
2015-03-24 17:59:08 +08:00
s . gsub! ( / \ [font=[^ \ ]]*? \ ] /i , '' )
2015-01-29 15:06:30 +08:00
s . gsub! ( / \ [ \/ font \ ] /i , '' )
2015-03-24 17:59:08 +08:00
s . gsub! ( / \ [p=[^ \ ]]*? \ ] /i , '' )
s . gsub! ( / \ [ \/ p \ ] /i , '' )
s . gsub! ( / \ [backcolor=[^ \ ]]*? \ ] /i , '' )
s . gsub! ( / \ [ \/ backcolor \ ] /i , '' )
2015-01-29 15:06:30 +08:00
# Remove the size tag
# I really have no idea what is this
2015-03-24 17:59:08 +08:00
s . gsub! ( / \ [size=[^ \ ]]*? \ ] /i , '' )
2015-01-29 15:06:30 +08:00
s . gsub! ( / \ [ \/ size \ ] /i , '' )
# Remove the color tag
2015-03-24 17:59:08 +08:00
s . gsub! ( / \ [color=[^ \ ]]*? \ ] /i , '' )
2015-01-29 15:06:30 +08:00
s . gsub! ( / \ [ \/ color \ ] /i , '' )
2015-02-11 13:43:16 +08:00
# Remove the hide tag
s . gsub! ( / \ [ \/ ?hide \ ] /i , '' )
2016-04-01 05:33:25 +08:00
s . gsub! ( / \ [ \/ ?free[^ \ ]]* \ ] /i , " \n " )
2015-02-11 13:43:16 +08:00
2015-01-29 15:06:30 +08:00
# Remove the align tag
# still don't know what it is
2016-04-01 05:33:25 +08:00
s . gsub! ( / \ [align=[^ \ ]]*? \ ] /i , " \n " )
2015-01-29 15:06:30 +08:00
s . gsub! ( / \ [ \/ align \ ] /i , " \n " )
2016-04-01 05:33:25 +08:00
s . gsub! ( / \ [float=[^ \ ]]*? \ ] /i , " \n " )
s . gsub! ( / \ [ \/ float \ ] /i , " \n " )
2015-01-29 15:06:30 +08:00
# Convert code
2015-02-11 13:43:16 +08:00
s . gsub! ( / \ [ \/ ?code \ ] /i , " \n ``` \n " )
2015-01-29 15:06:30 +08:00
# The edit notice should be removed
# example: 本帖最后由 Helloworld 于 2015-1-28 22:05 编辑
s . gsub! ( / \ [i=s \ ] 本帖最后由[ \ s \ S]*?编辑 \ [ \/ i \ ] / , '' )
# Convert the custom smileys to emojis
# `{:cry:}` to `:cry`
s . gsub! ( / \ {( \ : \ S*? \ :) \ } / , '\1' )
# Replace internal forum links that aren't in the <!-- l --> format
# convert list tags to ul and list=1 tags to ol
# (basically, we're only missing list=a here...)
s . gsub! ( / \ [list \ ](.*?) \ [ \/ list:u \ ] /m , '[ul]\1[/ul]' )
s . gsub! ( / \ [list=1 \ ](.*?) \ [ \/ list:o \ ] /m , '[ol]\1[/ol]' )
# convert *-tags to li-tags so bbcode-to-md can do its magic on phpBB's lists:
s . gsub! ( / \ [ \ * \ ](.*?) \ [ \/ \ *:m \ ] / , '[li]\1[/li]' )
# Discuz can create PM out of a post, which will generates like
# [url=http://example.com/forum.php?mod=redirect&goto=findpost&pid=111&ptid=11][b]关于您在“主题名称”的帖子[/b][/url]
s . gsub! ( pm_url_regexp ) do | discuzx_link |
replace_internal_link ( discuzx_link , $1 )
end
# [url][b]text[/b][/url] to **[url]text[/url]**
s . gsub! ( / ( \ [url=[^ \ [ \ ]]*? \ ]) \ [b \ ]( \ S*) \ [ \/ b \ ]( \ [ \/ url \ ]) / , '**\1\2\3**' )
2016-04-01 05:33:25 +08:00
@internal_url_regexps . each do | internal_url_regexp |
s . gsub! ( internal_url_regexp ) do | discuzx_link |
replace_internal_link ( discuzx_link , ( $~ [ :tid ] . to_i rescue nil ) , ( $~ [ :pid ] . to_i rescue nil ) , ( $~ [ :fid ] . to_i rescue nil ) , ( $~ [ :action ] rescue nil ) )
end
2015-01-29 15:06:30 +08:00
end
# @someone without the url
s . gsub! ( / @ \ [url=[^ \ [ \ ]]*? \ ]( \ S*) \ [ \/ url \ ] /i , '@\1' )
2016-04-01 05:33:25 +08:00
s . scan ( / http(?:s)?: \/ \/ #{ ORIGINAL_SITE_PREFIX . gsub ( '.' , '\.' ) } \/ [^ \ [ \ ] \ s]* / ) { | link | puts " WARNING: post #{ import_id } can't replace internal url #{ link } " }
2015-01-29 15:06:30 +08:00
s . strip
end
2016-04-01 05:33:25 +08:00
def replace_internal_link ( discuzx_link , import_topic_id , import_post_id , import_category_id , action )
if import_post_id
post_id = post_id_from_imported_post_id import_post_id
if post_id
post = Post . find post_id
return post . full_url if post
end
end
if import_topic_id
2015-01-29 15:06:30 +08:00
2016-04-01 05:33:25 +08:00
results = mysql_query ( " SELECT pid
FROM #{table_name 'forum_post'}
WHERE tid = #{import_topic_id} AND first
LIMIT 1 " )
2015-01-29 15:06:30 +08:00
2016-04-01 05:33:25 +08:00
return discuzx_link unless results . size > 0
2015-01-29 15:06:30 +08:00
2016-04-01 05:33:25 +08:00
linked_post_id = results . first [ 'pid' ]
lookup = topic_lookup_from_imported_post_id ( linked_post_id )
if lookup
return " #{ NEW_SITE_PREFIX } #{ lookup [ :url ] } "
else
return discuzx_link
end
2015-01-29 15:06:30 +08:00
end
2016-04-01 05:33:25 +08:00
if import_category_id
category_id = category_id_from_imported_category_id import_category_id
if category_id
category = Category . find category_id
return category . url if category
end
end
case action
when 'index'
return " #{ NEW_SITE_PREFIX } / "
when 'stat' , 'stats' , 'ranklist'
return " #{ NEW_SITE_PREFIX } /users "
end
discuzx_link
2015-01-29 15:06:30 +08:00
end
def pm_url_regexp
@pm_url_regexp || = Regexp . new ( " http(?:s)?:// #{ ORIGINAL_SITE_PREFIX . gsub ( '.' , '\.' ) } /forum \\ .php \\ ?mod=redirect&goto=findpost&pid= \\ d+&ptid=( \\ d+) " )
end
# This step is done separately because it can take multiple attempts to get right (because of
# missing files, wrong paths, authorized extensions, etc.).
def import_attachments
setting = AUTHORIZED_EXTENSIONS . join ( '|' )
SiteSetting . authorized_extensions = setting if setting != SiteSetting . authorized_extensions
attachment_regex = / \ [attach \ ]( \ d+) \ [ \/ attach \ ] /
2016-04-01 05:33:25 +08:00
attachment_link_regex = / \ [x-attach \ ](.+) \ [ \/ x-attach \ ] /
2015-01-29 15:06:30 +08:00
current_count = 0
total_count = mysql_query ( " SELECT count(*) count FROM #{ table_name 'forum_post' } ; " ) . first [ 'count' ]
success_count = 0
fail_count = 0
puts '' , " Importing attachments... " , ''
Post . find_each do | post |
2016-04-01 05:33:25 +08:00
next unless post . custom_fields [ 'import_id' ] == post . custom_fields [ 'import_id' ] . to_i . to_s
user = post . user
2015-01-29 15:06:30 +08:00
current_count += 1
print_status current_count , total_count
new_raw = post . raw . dup
2016-04-01 05:33:25 +08:00
inline_attachments = [ ]
2015-01-29 15:06:30 +08:00
new_raw . gsub! ( attachment_regex ) do | s |
2016-04-01 05:33:25 +08:00
attachment_id = $1 . to_i
inline_attachments . push attachment_id
2015-01-29 15:06:30 +08:00
upload , filename = find_upload ( user , post , attachment_id )
unless upload
fail_count += 1
next
end
html_for_upload ( upload , filename )
end
2016-04-01 05:33:25 +08:00
new_raw . gsub! ( attachment_link_regex ) do | s |
attachment_file = $1
filename = File . basename ( attachment_file )
upload = create_upload ( user . id , File . join ( DISCUZX_BASE_DIR , attachment_file ) , filename )
unless upload
fail_count += 1
next
end
html_for_upload ( upload , filename )
end
sql = " SELECT aid
FROM #{table_name 'forum_attachment'}
WHERE pid = #{post.custom_fields['import_id']}"
if ! inline_attachments . empty?
sql << " AND aid NOT IN ( #{ inline_attachments . join ( ',' ) } ) "
end
results = mysql_query ( sql )
results . each do | attachment |
attachment_id = attachment [ 'aid' ]
upload , filename = find_upload ( user , post , attachment_id )
unless upload
fail_count += 1
next
end
html = html_for_upload ( upload , filename )
unless new_raw . include? html
new_raw << " \n "
new_raw << html
end
end
2015-01-29 15:06:30 +08:00
if new_raw != post . raw
PostRevisor . new ( post ) . revise! ( post . user , { raw : new_raw } , { bypass_bump : true , edit_reason : '从 Discuz 中导入附件' } )
end
success_count += 1
end
puts '' , ''
puts " succeeded: #{ success_count } "
puts " failed: #{ fail_count } " if fail_count > 0
puts ''
end
# Create the full path to the discuz avatar specified from user id
2016-04-01 05:33:25 +08:00
def discuzx_avatar_fullpath ( user_id , absolute = true )
2015-01-29 15:06:30 +08:00
padded_id = user_id . to_s . rjust ( 9 , '0' )
part_1 = padded_id [ 0 .. 2 ]
part_2 = padded_id [ 3 .. 4 ]
part_3 = padded_id [ 5 .. 6 ]
part_4 = padded_id [ - 2 .. - 1 ]
file_name = " #{ part_4 } _avatar_big.jpg "
2016-04-01 05:33:25 +08:00
if absolute
return File . join ( DISCUZX_BASE_DIR , AVATAR_DIR , part_1 , part_2 , part_3 , file_name ) , file_name
else
return File . join ( AVATAR_DIR , part_1 , part_2 , part_3 , file_name ) , file_name
end
2015-01-29 15:06:30 +08:00
end
# post id is in the quote block
def find_post_id_by_quote_number ( raw )
2016-04-01 05:33:25 +08:00
case raw
when / \ [url=forum.php \ ?mod=redirect&goto=findpost&pid=( \ d+)&ptid= \ d+ \ ] / #standard
$1
when / \ [url=https?: \/ \/ #{ ORIGINAL_SITE_PREFIX } \/ redirect.php \ ?goto=findpost&pid=( \ d+)&ptid= \ d+ \ ] / # old discuz 7 format
$1
when / \ [quote \ ][ \ S \ s]*pid=( \ d+)[ \ S \ s]* \ [ \/ quote \ ] / # quote
$1
end
2015-01-29 15:06:30 +08:00
end
# for some reason, discuz inlined some png file
# the corresponding image stored is broken in a way
def upload_inline_image ( data )
return unless data
puts 'Creating inline image'
encoded_photo = data [ 'data:image/png;base64,' . length .. - 1 ]
if encoded_photo
raw_file = Base64 . decode64 ( encoded_photo )
else
puts 'Error parsed inline photo' , data [ 0 .. 20 ]
return
end
real_filename = " #{ SecureRandom . hex } .png "
filename = Tempfile . new ( [ 'inline' , '.png' ] )
begin
filename . binmode
filename . write ( raw_file )
filename . rewind
upload = create_upload ( Discourse :: SYSTEM_USER_ID , filename , real_filename )
ensure
filename . close rescue nil
filename . unlink rescue nil
end
if upload . nil? || ! upload . valid?
puts " Upload not valid :( "
puts upload . errors . inspect if upload
return nil
end
return upload , real_filename
end
# find the uploaded file and real name from the db
def find_upload ( user , post , upload_id )
attachment_table = table_name 'forum_attachment'
# search for table id
sql = " SELECT a.pid post_id,
a . aid upload_id ,
a . tableid table_id
FROM #{attachment_table} a
WHERE a . pid = #{post.custom_fields['import_id']}
AND a . aid = #{upload_id};"
results = mysql_query ( sql )
unless ( meta_data = results . first )
puts " Couldn't find forum_attachment record meta data for post.id = #{ post . id } , import_id = #{ post . custom_fields [ 'import_id' ] } "
return nil
end
# search for uploaded file meta data
sql = " SELECT a.pid post_id,
a . aid upload_id ,
a . tid topic_id ,
a . uid user_id ,
a . dateline uploaded_time ,
a . filename real_filename ,
a . attachment attachment_path ,
a . remote is_remote ,
a . description description ,
a . isimage is_image ,
a . thumb is_thumb
FROM #{attachment_table}_#{meta_data['table_id']} a
WHERE a . aid = #{upload_id};"
results = mysql_query ( sql )
unless ( row = results . first )
puts " Couldn't find attachment record for post.id = #{ post . id } , import_id = #{ post . custom_fields [ 'import_id' ] } "
return nil
end
filename = File . join ( DISCUZX_BASE_DIR , ATTACHMENT_DIR , row [ 'attachment_path' ] )
unless File . exists? ( filename )
puts " Attachment file doesn't exist: #{ filename } "
return nil
end
real_filename = row [ 'real_filename' ]
real_filename . prepend SecureRandom . hex if real_filename [ 0 ] == '.'
upload = create_upload ( user . id , filename , real_filename )
if upload . nil? || ! upload . valid?
puts " Upload not valid :( "
puts upload . errors . inspect if upload
return nil
end
return upload , real_filename
rescue Mysql2 :: Error = > e
puts " SQL Error "
puts e . message
puts sql
return nil
end
2016-04-01 05:33:25 +08:00
def first_exists ( * items )
items . find { | item | ! item . blank? } || ''
end
2015-01-29 15:06:30 +08:00
def mysql_query ( sql )
@client . query ( sql , cache_rows : false )
end
end
ImportScripts :: DiscuzX . new . perform