2015-01-29 15:06:30 +08:00
# encoding: utf-8
2019-05-03 08:17:27 +10:00
# frozen_string_literal: true
2015-01-29 15:06:30 +08:00
# Author: Erick Guan <fantasticfears@gmail.com>
# This script import the data from latest Discuz! X
# Should work among Discuz! X3.x
# This script is tested only on Simplified Chinese Discuz! X instances
# If you want to import data other than Simplified Chinese, email me.
2023-01-07 11:53:14 +00:00
require " php_serialize "
require " miro "
require " mysql2 "
2015-03-18 20:30:42 +01:00
require File . expand_path ( File . dirname ( __FILE__ ) + " /base.rb " )
2015-01-29 15:06:30 +08:00
class ImportScripts :: DiscuzX < ImportScripts :: Base
DISCUZX_DB = " ultrax "
2023-01-07 11:53:14 +00:00
DB_TABLE_PREFIX = " pre_ "
2015-01-29 15:06:30 +08:00
ORIGINAL_SITE_PREFIX = " oldsite.example.com/forums " # without http(s)://
2023-01-07 11:53:14 +00:00
NEW_SITE_PREFIX = " http://discourse.example.com " # with http:// or https://
2015-01-29 15:06:30 +08:00
# Set DISCUZX_BASE_DIR to the base directory of your discuz installation.
2023-01-07 11:53:14 +00:00
DISCUZX_BASE_DIR = " /var/www/discuz/upload "
AVATAR_DIR = " /uc_server/data/avatar "
ATTACHMENT_DIR = " /data/attachment/forum "
AUTHORIZED_EXTENSIONS = %w[ jpg jpeg png gif zip rar pdf ]
2015-01-29 15:06:30 +08:00
def initialize
2023-01-07 11:53:14 +00:00
@client =
Mysql2 :: Client . new (
host : " localhost " ,
username : " root " ,
#password: "password",
database : DISCUZX_DB ,
2015-01-29 15:06:30 +08:00
@first_post_id_by_topic_id = { }
2016-01-26 17:07:54 +01:00
@internal_url_regexps = [
2023-01-07 11:53:14 +00:00
%r{ http(?:s)?:// #{ ORIGINAL_SITE_PREFIX . gsub ( " . " , '\.' ) } /forum \ .php \ ?mod=viewthread(?:&|&)tid=(?<tid> \ d+)(?:[^ \ [ \ ] \ s]*)(?:pid=?(?<pid> \ d+))?(?:[^ \ [ \ ] \ s]*) } ,
%r{ http(?:s)?:// #{ ORIGINAL_SITE_PREFIX . gsub ( " . " , '\.' ) } /viewthread \ .php \ ?tid=(?<tid> \ d+)(?:[^ \ [ \ ] \ s]*)(?:pid=?(?<pid> \ d+))?(?:[^ \ [ \ ] \ s]*) } ,
%r{ http(?:s)?:// #{ ORIGINAL_SITE_PREFIX . gsub ( " . " , '\.' ) } /forum \ .php \ ?mod=redirect(?:&|&)goto=findpost(?:&|&)pid=(?<pid> \ d+)(?:&|&)ptid=(?<tid> \ d+)(?:[^ \ [ \ ] \ s]*) } ,
%r{ http(?:s)?:// #{ ORIGINAL_SITE_PREFIX . gsub ( " . " , '\.' ) } /redirect \ .php \ ?goto=findpost(?:&|&)pid=(?<pid> \ d+)(?:&|&)ptid=(?<tid> \ d+)(?:[^ \ [ \ ] \ s]*) } ,
%r{ http(?:s)?:// #{ ORIGINAL_SITE_PREFIX . gsub ( " . " , '\.' ) } /forumdisplay \ .php \ ?fid=(?<fid> \ d+)(?:[^ \ [ \ ] \ s]*) } ,
%r{ http(?:s)?:// #{ ORIGINAL_SITE_PREFIX . gsub ( " . " , '\.' ) } /forum \ .php \ ?mod=forumdisplay(?:&|&)fid=(?<fid> \ d+)(?:[^ \ [ \ ] \ s]*) } ,
%r{ http(?:s)?:// #{ ORIGINAL_SITE_PREFIX . gsub ( " . " , '\.' ) } /(?<action>index) \ .php(?:[^ \ [ \ ] \ s]*) } ,
%r{ http(?:s)?:// #{ ORIGINAL_SITE_PREFIX . gsub ( " . " , '\.' ) } /(?<action>stats) \ .php(?:[^ \ [ \ ] \ s]*) } ,
%r{ http(?:s)?:// #{ ORIGINAL_SITE_PREFIX . gsub ( " . " , '\.' ) } /misc.php \ ?mod=(?<mod>stat|ranklist)(?:[^ \ [ \ ] \ s]*) } ,
2016-01-26 17:07:54 +01:00
2015-01-29 15:06:30 +08:00
def execute
2016-01-26 17:07:54 +01:00
2015-01-29 15:06:30 +08:00
# add the prefix to the table name
def table_name ( name = nil )
# find which group members can be granted as admin
def get_knowledge_about_group
2023-01-07 11:53:14 +00:00
group_table = table_name " common_usergroup "
result =
mysql_query (
" SELECT groupid group_id, radminid role_id
FROM #{group_table};",
2016-01-26 17:07:54 +01:00
@moderator_group_id = [ ]
@admin_group_id = [ ]
#@banned_group_id = [4,5] # 禁止的用户及其帖子均不导入,如果你想导入这些用户和帖子,请把这个数组清空。
2015-01-29 15:06:30 +08:00
result . each do | group |
2023-01-07 11:53:14 +00:00
case group [ " role_id " ]
2016-01-26 17:07:54 +01:00
when 1 # 管理员
2023-01-07 11:53:14 +00:00
@admin_group_id << group [ " group_id " ]
when 2 ,
3 # 超级版主、版主。如果你不希望原普通版主成为Discourse版主,把3去掉。
@moderator_group_id << group [ " group_id " ]
2016-01-26 17:07:54 +01:00
def get_knowledge_about_category_slug
@category_slug = { }
2023-01-07 11:53:14 +00:00
results =
mysql_query (
" SELECT svalue value
FROM #{table_name "common_setting"}
WHERE skey = 'forumkeys' " ,
2016-01-26 17:07:54 +01:00
return if results . size < 1
2023-01-07 11:53:14 +00:00
value = results . first [ " value " ]
2016-01-26 17:07:54 +01:00
return if value . blank?
2023-01-07 11:53:14 +00:00
. unserialize ( value )
. each do | category_import_id , slug |
next if slug . blank?
@category_slug [ category_import_id ] = slug
2016-01-26 17:07:54 +01:00
def get_knowledge_about_duplicated_email
@duplicated_email = { }
2023-01-07 11:53:14 +00:00
results =
mysql_query (
" select a.uid uid, b.uid import_id from pre_common_member a
2016-01-26 17:07:54 +01:00
join ( select uid , email from pre_common_member group by email having count ( email ) > 1 order by uid asc ) b USING ( email )
2023-01-07 11:53:14 +00:00
where a . uid != b . uid " ,
2016-01-26 17:07:54 +01:00
users = @lookup . instance_variable_get :@users
results . each do | row |
2023-01-07 11:53:14 +00:00
@duplicated_email [ row [ " uid " ] ] = row [ " import_id " ]
user_id = users [ row [ " import_id " ] ]
users [ row [ " uid " ] ] = user_id if user_id
2015-01-29 15:06:30 +08:00
def import_users
2023-01-07 11:53:14 +00:00
puts " " , " creating users "
2015-01-29 15:06:30 +08:00
2023-01-07 11:53:14 +00:00
sensitive_user_table = table_name " ucenter_members "
user_table = table_name " common_member "
profile_table = table_name " common_member_profile "
status_table = table_name " common_member_status "
forum_table = table_name " common_member_field_forum "
home_table = table_name " common_member_field_home "
total_count = mysql_query ( " SELECT count(*) count FROM #{ user_table } ; " ) . first [ " count " ]
2015-01-29 15:06:30 +08:00
batches ( BATCH_SIZE ) do | offset |
2023-01-07 11:53:14 +00:00
results =
mysql_query (
" SELECT u.uid id, u.username username, u.email email, u.groupid group_id,
2016-01-26 17:07:54 +01:00
su . regdate regdate , su . password password_hash , su . salt salt ,
s . regip regip , s . lastip last_visit_ip , s . lastvisit last_visit_time , s . lastpost last_posted_at , s . lastsendmail last_emailed_at ,
u . emailstatus email_confirmed , u . avatarstatus avatar_exists ,
p . site website , p . address address , p . bio bio , p . realname realname , p . qq qq ,
p . resideprovince resideprovince , p . residecity residecity , p . residedist residedist , p . residecommunity residecommunity ,
p . resideprovince birthprovince , p . birthcity birthcity , p . birthdist birthdist , p . birthcommunity birthcommunity ,
h . spacecss spacecss , h . spacenote spacenote ,
f . customstatus customstatus , f . sightml sightml
2015-01-29 15:06:30 +08:00
FROM #{user_table} u
2016-01-26 17:07:54 +01:00
LEFT JOIN #{sensitive_user_table} su USING(uid)
LEFT JOIN #{profile_table} p USING(uid)
LEFT JOIN #{status_table} s USING(uid)
LEFT JOIN #{forum_table} f USING(uid)
LEFT JOIN #{home_table} h USING(uid)
2015-01-29 15:06:30 +08:00
ORDER BY u . uid ASC
2023-01-07 11:53:14 +00:00
OFFSET #{offset};",
2015-01-29 15:06:30 +08:00
break if results . size < 1
2021-05-20 21:43:47 -04:00
# TODO: breaks the script reported by some users
2016-01-26 17:07:54 +01:00
# next if all_records_exist? :users, users.map {|u| u["id"].to_i}
2015-09-21 16:48:42 -07:00
2015-01-29 15:06:30 +08:00
create_users ( results , total : total_count , offset : offset ) do | user |
2023-01-07 11:53:14 +00:00
id : user [ " id " ] ,
email : user [ " email " ] ,
username : user [ " username " ] ,
name : first_exists ( user [ " realname " ] , user [ " customstatus " ] , user [ " username " ] ) ,
import_pass : user [ " password_hash " ] ,
2016-01-26 17:07:54 +01:00
active : true ,
2023-01-07 11:53:14 +00:00
salt : user [ " salt " ] ,
2016-01-26 17:07:54 +01:00
# TODO: title: user['customstatus'], # move custom title to name since discourse can't let user custom title https://meta.discourse.org/t/let-users-custom-their-title/37626
2023-01-07 11:53:14 +00:00
created_at : user [ " regdate " ] ? Time . zone . at ( user [ " regdate " ] ) : nil ,
registration_ip_address : user [ " regip " ] ,
ip_address : user [ " last_visit_ip " ] ,
last_seen_at : user [ " last_visit_time " ] ,
last_emailed_at : user [ " last_emailed_at " ] ,
last_posted_at : user [ " last_posted_at " ] ,
moderator : @moderator_group_id . include? ( user [ " group_id " ] ) ,
admin : @admin_group_id . include? ( user [ " group_id " ] ) ,
website :
( user [ " website " ] && user [ " website " ] . include? ( " . " ) ) ?
user [ " website " ] . strip :
if (
user [ " qq " ] && user [ " qq " ] . strip == ( user [ " qq " ] . strip . to_i ) &&
user [ " qq " ] . strip . to_i > ( 10_000 )
" http://user.qzone.qq.com/ " + user [ " qq " ] . strip
end ,
bio_raw :
first_exists (
( user [ " bio " ] && CGI . unescapeHTML ( user [ " bio " ] ) ) ,
user [ " sightml " ] ,
user [ " spacenote " ] ,
) . strip [
0 ,
] ,
location :
first_exists (
user [ " address " ] ,
if ! user [ " resideprovince " ] . blank?
user [ " resideprovince " ] ,
user [ " residecity " ] ,
user [ " residedist " ] ,
user [ " residecommunity " ] ,
user [ " birthprovince " ] ,
user [ " birthcity " ] ,
user [ " birthdist " ] ,
user [ " birthcommunity " ] ,
) . reject { | location | location . blank? } . join ( " " ) ,
) ,
post_create_action :
lambda do | newmember |
if user [ " avatar_exists " ] == ( 1 ) && newmember . uploaded_avatar_id . blank?
path , filename = discuzx_avatar_fullpath ( user [ " id " ] )
if path
upload = create_upload ( newmember . id , path , filename )
if ! upload . nil? && upload . persisted?
newmember . import_mode = false
newmember . create_user_avatar
newmember . import_mode = true
newmember . user_avatar . update ( custom_upload_id : upload . id )
newmember . update ( uploaded_avatar_id : upload . id )
puts " Error: Upload did not persist! "
rescue SystemCallError = > err
puts " Could not import avatar: #{ err . message } "
2015-01-29 15:06:30 +08:00
2023-01-07 11:53:14 +00:00
if ! user [ " spacecss " ] . blank? && newmember . user_profile . profile_background_upload . blank?
# profile background
if matched = user [ " spacecss " ] . match ( / body \ s*{[^}]*url \ ('?(.+?)'? \ ) /i )
body_background = matched [ 1 ] . split ( ORIGINAL_SITE_PREFIX , 2 ) . last
2016-01-26 17:07:54 +01:00
2023-01-07 11:53:14 +00:00
if matched = user [ " spacecss " ] . match ( / # hd \ s*{[^}]*url \ ('?(.+?)'? \ ) /i )
header_background = matched [ 1 ] . split ( ORIGINAL_SITE_PREFIX , 2 ) . last
if matched = user [ " spacecss " ] . match ( / .blocktitle \ s*{[^}]*url \ ('?(.+?)'? \ ) /i )
blocktitle_background = matched [ 1 ] . split ( ORIGINAL_SITE_PREFIX , 2 ) . last
if matched = user [ " spacecss " ] . match ( / # ct \ s*{[^}]*url \ ('?(.+?)'? \ ) /i )
content_background = matched [ 1 ] . split ( ORIGINAL_SITE_PREFIX , 2 ) . last
if body_background || header_background || blocktitle_background ||
profile_background =
first_exists (
header_background ,
body_background ,
content_background ,
blocktitle_background ,
card_background =
first_exists (
content_background ,
body_background ,
header_background ,
blocktitle_background ,
upload =
create_upload (
newmember . id ,
File . join ( DISCUZX_BASE_DIR , profile_background ) ,
File . basename ( profile_background ) ,
if upload
newmember . user_profile . upload_profile_background upload
puts " WARNING: #{ user [ " username " ] } (UID: #{ user [ " id " ] } ) profile_background file did not persist! "
upload =
create_upload (
newmember . id ,
File . join ( DISCUZX_BASE_DIR , card_background ) ,
File . basename ( card_background ) ,
if upload
newmember . user_profile . upload_card_background upload
puts " WARNING: #{ user [ " username " ] } (UID: #{ user [ " id " ] } ) card_background file did not persist! "
2016-01-26 17:07:54 +01:00
2015-01-29 15:06:30 +08:00
2023-01-07 11:53:14 +00:00
# we don't send email to the unconfirmed user
if newmember . email_digests
newmember . update ( email_digests : user [ " email_confirmed " ] == 1 )
if ! newmember . name . blank? && newmember . name == ( newmember . username )
newmember . update ( name : " " )
end ,
2015-01-29 15:06:30 +08:00
def import_categories
2023-01-07 11:53:14 +00:00
puts " " , " creating categories "
2015-01-29 15:06:30 +08:00
2016-01-26 17:07:54 +01:00
2023-01-07 11:53:14 +00:00
forums_table = table_name " forum_forum "
forums_data_table = table_name " forum_forumfield "
2015-01-29 15:06:30 +08:00
2023-01-07 11:53:14 +00:00
results =
mysql_query (
2015-01-29 15:06:30 +08:00
SELECT f . fid id , f . fup parent_id , f . name , f . type type , f . status status , f . displayorder position ,
2016-01-26 17:07:54 +01:00
d . description description , d . rules rules , d . icon , d . extra extra
2015-01-29 15:06:30 +08:00
FROM #{forums_table} f
2016-01-26 17:07:54 +01:00
LEFT JOIN #{forums_data_table} d USING(fid)
2015-01-29 15:06:30 +08:00
ORDER BY parent_id ASC , id ASC
2023-01-07 11:53:14 +00:00
" ,
2015-01-29 15:06:30 +08:00
max_position = Category . all . max_by ( & :position ) . position
create_categories ( results ) do | row |
2023-01-07 11:53:14 +00:00
next if row [ " type " ] == ( " group " ) || row [ " status " ] == ( 2 ) # or row['status'].to_i == 3 # 如果不想导入群组,取消注释
extra = PHP . unserialize ( row [ " extra " ] ) if ! row [ " extra " ] . blank?
color = extra [ " namecolor " ] [ 1 , 6 ] if extra && ! extra [ " namecolor " ] . blank?
2015-01-29 15:06:30 +08:00
Category . all . max_by ( & :position ) . position
2016-01-26 17:07:54 +01:00
2015-01-29 15:06:30 +08:00
h = {
2023-01-07 11:53:14 +00:00
id : row [ " id " ] ,
name : row [ " name " ] ,
description : row [ " description " ] ,
position : row [ " position " ] . to_i + max_position ,
2016-01-26 17:07:54 +01:00
color : color ,
2023-01-07 11:53:14 +00:00
post_create_action :
lambda do | category |
if slug = @category_slug [ row [ " id " ] ]
category . update ( slug : slug )
2016-01-26 17:07:54 +01:00
2023-01-07 11:53:14 +00:00
raw = process_discuzx_post ( row [ " rules " ] , nil )
if @bbcode_to_md
raw =
raw . bbcode_to_md ( false )
rescue StandardError
category . topic . posts . first . update_attribute ( :raw , raw )
if ! row [ " icon " ] . empty?
upload =
create_upload (
Discourse :: SYSTEM_USER_ID ,
File . join ( DISCUZX_BASE_DIR , ATTACHMENT_DIR , " ../common " , row [ " icon " ] ) ,
File . basename ( row [ " icon " ] ) ,
if upload
category . uploaded_logo_id = upload . id
# FIXME: I don't know how to get '/shared' by script. May change to Rails.root
category . color =
Miro :: DominantColors . new ( File . join ( " /shared " , upload . url ) ) . to_hex . first [
1 ,
] if ! color
category . save!
2016-01-26 17:07:54 +01:00
2019-11-18 12:28:35 +05:30
2023-01-07 11:53:14 +00:00
if row [ " status " ] == ( 0 ) || row [ " status " ] == ( 3 )
SiteSetting . default_categories_muted = [
SiteSetting . default_categories_muted ,
category . id ,
] . reject ( & :blank? ) . join ( " | " )
end ,
2015-01-29 15:06:30 +08:00
2023-01-07 11:53:14 +00:00
if row [ " parent_id " ] . to_i > 0
h [ :parent_category_id ] = category_id_from_imported_category_id ( row [ " parent_id " ] )
2015-01-29 15:06:30 +08:00
def import_posts
puts " " , " creating topics and posts "
2023-01-07 11:53:14 +00:00
users_table = table_name " common_member "
posts_table = table_name " forum_post "
topics_table = table_name " forum_thread "
2015-01-29 15:06:30 +08:00
2023-01-07 11:53:14 +00:00
total_count = mysql_query ( " SELECT count(*) count FROM #{ posts_table } " ) . first [ " count " ]
2015-01-29 15:06:30 +08:00
batches ( BATCH_SIZE ) do | offset |
2023-01-07 11:53:14 +00:00
results =
mysql_query (
2015-01-29 15:06:30 +08:00
SELECT p . pid id ,
p . tid topic_id ,
t . fid category_id ,
t . subject title ,
p . authorid user_id ,
p . message raw ,
p . dateline post_time ,
2016-01-26 17:07:54 +01:00
p2 . pid first_id ,
p . invisible status ,
t . special special
FROM #{posts_table} p
JOIN #{posts_table} p2 ON p2.first AND p2.tid = p.tid
JOIN #{topics_table} t ON t.tid = p.tid
where t . tid < 10000
2015-01-29 15:06:30 +08:00
ORDER BY id ASC , topic_id ASC
OFFSET #{offset};
2023-01-07 11:53:14 +00:00
" ,
2016-01-26 17:07:54 +01:00
# u.status != -1 AND u.groupid != 4 AND u.groupid != 5 用户未被锁定、禁访或禁言。在现实中的 Discuz 论坛,禁止的用户通常是广告机或驱逐的用户,这些不需要导入。
2015-01-29 15:06:30 +08:00
break if results . size < 1
2015-09-21 16:48:42 -07:00
next if all_records_exist? :posts , results . map { | p | p [ " id " ] . to_i }
2015-01-29 15:06:30 +08:00
create_posts ( results , total : total_count , offset : offset ) do | m |
skip = false
mapped = { }
2023-01-07 11:53:14 +00:00
mapped [ :id ] = m [ " id " ]
mapped [ :user_id ] = user_id_from_imported_user_id ( m [ " user_id " ] ) || - 1
mapped [ :raw ] = process_discuzx_post ( m [ " raw " ] , m [ " id " ] )
mapped [ :created_at ] = Time . zone . at ( m [ " post_time " ] )
mapped [ :tags ] = m [ " tags " ]
2015-01-29 15:06:30 +08:00
2023-01-07 11:53:14 +00:00
if m [ " id " ] == m [ " first_id " ]
mapped [ :category ] = category_id_from_imported_category_id ( m [ " category_id " ] )
mapped [ :title ] = CGI . unescapeHTML ( m [ " title " ] )
2016-01-26 17:07:54 +01:00
2023-01-07 11:53:14 +00:00
if m [ " special " ] == 1
results =
mysql_query (
2016-01-26 17:07:54 +01:00
SELECT multiple , maxchoices
2023-01-07 11:53:14 +00:00
FROM #{table_name "forum_poll"}
WHERE tid = #{m["topic_id"]}",
2016-01-26 17:07:54 +01:00
poll = results . first || { }
2023-01-07 11:53:14 +00:00
results =
mysql_query (
2016-01-26 17:07:54 +01:00
SELECT polloption
2023-01-07 11:53:14 +00:00
FROM #{table_name "forum_polloption"}
WHERE tid = #{m["topic_id"]}
ORDER BY displayorder " ,
2016-01-26 17:07:54 +01:00
if results . empty?
2023-01-07 11:53:14 +00:00
puts " WARNING: can't find poll options for topic #{ m [ " topic_id " ] } , skip poll "
2016-01-26 17:07:54 +01:00
2023-01-07 11:53:14 +00:00
mapped [
] . prepend " [poll #{ poll [ " multiple " ] ? " type=multiple " : " " } #{ poll [ " maxchoices " ] > 0 ? " max= #{ poll [ " maxchoices " ] } " : " " } ] \n #{ results . map { | option | " - " + option [ " polloption " ] } . join ( " \n " ) } \n [/poll] \n "
2016-01-26 17:07:54 +01:00
2015-01-29 15:06:30 +08:00
2023-01-07 11:53:14 +00:00
parent = topic_lookup_from_imported_post_id ( m [ " first_id " ] )
2015-01-29 15:06:30 +08:00
if parent
mapped [ :topic_id ] = parent [ :topic_id ]
2023-01-07 11:53:14 +00:00
reply_post_import_id = find_post_id_by_quote_number ( m [ " raw " ] )
2016-01-26 17:07:54 +01:00
if reply_post_import_id
post_id = post_id_from_imported_post_id ( reply_post_import_id . to_i )
if ( post = Post . find_by ( id : post_id ) )
if post . topic_id == mapped [ :topic_id ]
mapped [ :reply_to_post_number ] = post . post_number
2023-01-07 11:53:14 +00:00
puts " post #{ m [ " id " ] } reply to another topic, skip reply "
2016-01-26 17:07:54 +01:00
2023-01-07 11:53:14 +00:00
puts " post #{ m [ " id " ] } reply to not exists post #{ reply_post_import_id } , skip reply "
2016-01-26 17:07:54 +01:00
2015-01-29 15:06:30 +08:00
2023-01-07 11:53:14 +00:00
puts " Parent topic #{ m [ " topic_id " ] } doesn't exist. Skipping #{ m [ " id " ] } : #{ m [ " title " ] [ 0 .. 40 ] } "
2015-01-29 15:06:30 +08:00
skip = true
2023-01-07 11:53:14 +00:00
if m [ " status " ] & 1 == 1 || mapped [ :raw ] . blank?
2018-09-04 10:16:21 +08:00
mapped [ :post_create_action ] = lambda do | action_post |
PostDestroyer . new ( Discourse . system_user , action_post ) . perform_delete
2015-01-29 15:06:30 +08:00
2023-01-07 11:53:14 +00:00
elsif ( m [ " status " ] & 2 ) >> 1 == 1 # waiting for approve
2018-09-04 10:16:21 +08:00
mapped [ :post_create_action ] = lambda do | action_post |
2019-01-03 12:03:01 -05:00
PostActionCreator . notify_user ( Discourse . system_user , action_post )
2015-01-29 15:06:30 +08:00
skip ? nil : mapped
2016-01-26 17:07:54 +01:00
def import_bookmarks
2023-01-07 11:53:14 +00:00
puts " " , " creating bookmarks "
favorites_table = table_name " home_favorite "
posts_table = table_name " forum_post "
total_count =
mysql_query ( " SELECT count(*) count FROM #{ favorites_table } WHERE idtype = 'tid' " ) . first [
" count "
2016-01-26 17:07:54 +01:00
batches ( BATCH_SIZE ) do | offset |
2023-01-07 11:53:14 +00:00
results =
mysql_query (
2016-01-26 17:07:54 +01:00
SELECT p . pid post_id , f . uid user_id
FROM #{favorites_table} f
JOIN #{posts_table} p ON f.id = p.tid
WHERE f . idtype = 'tid' AND p . first
2023-01-07 11:53:14 +00:00
OFFSET #{offset};",
2016-01-26 17:07:54 +01:00
break if results . size < 1
# next if all_records_exist?
create_bookmarks ( results , total : total_count , offset : offset ) do | row |
2023-01-07 11:53:14 +00:00
{ user_id : row [ " user_id " ] , post_id : row [ " post_id " ] }
2016-01-26 17:07:54 +01:00
2015-01-29 15:06:30 +08:00
def import_private_messages
2023-01-07 11:53:14 +00:00
puts " " , " creating private messages "
2015-01-29 15:06:30 +08:00
2023-01-07 11:53:14 +00:00
pm_indexes = table_name " ucenter_pm_indexes "
pm_messages = table_name " ucenter_pm_messages "
total_count = mysql_query ( " SELECT count(*) count FROM #{ pm_indexes } " ) . first [ " count " ]
2015-01-29 15:06:30 +08:00
batches ( BATCH_SIZE ) do | offset |
2023-01-07 11:53:14 +00:00
results =
mysql_query (
2015-01-29 15:06:30 +08:00
SELECT pmid id , plid thread_id , authorid user_id , message , dateline created_at
FROM #{pm_messages}_1
UNION SELECT pmid id , plid thread_id , authorid user_id , message , dateline created_at
FROM #{pm_messages}_2
UNION SELECT pmid id , plid thread_id , authorid user_id , message , dateline created_at
FROM #{pm_messages}_3
UNION SELECT pmid id , plid thread_id , authorid user_id , message , dateline created_at
FROM #{pm_messages}_4
UNION SELECT pmid id , plid thread_id , authorid user_id , message , dateline created_at
FROM #{pm_messages}_5
UNION SELECT pmid id , plid thread_id , authorid user_id , message , dateline created_at
FROM #{pm_messages}_6
UNION SELECT pmid id , plid thread_id , authorid user_id , message , dateline created_at
FROM #{pm_messages}_7
UNION SELECT pmid id , plid thread_id , authorid user_id , message , dateline created_at
FROM #{pm_messages}_8
UNION SELECT pmid id , plid thread_id , authorid user_id , message , dateline created_at
FROM #{pm_messages}_9
ORDER BY thread_id ASC , id ASC
2023-01-07 11:53:14 +00:00
OFFSET #{offset};",
2015-01-29 15:06:30 +08:00
break if results . size < 1
2016-01-26 17:07:54 +01:00
# next if all_records_exist? :posts, results.map {|m| "pm:#{m['id']}"}
2015-09-21 16:48:42 -07:00
2015-01-29 15:06:30 +08:00
create_posts ( results , total : total_count , offset : offset ) do | m |
skip = false
mapped = { }
2023-01-07 11:53:14 +00:00
mapped [ :id ] = " pm: #{ m [ " id " ] } "
mapped [ :user_id ] = user_id_from_imported_user_id ( m [ " user_id " ] ) || - 1
mapped [ :raw ] = process_discuzx_post ( m [ " message " ] , m [ " id " ] )
mapped [ :created_at ] = Time . zone . at ( m [ " created_at " ] )
thread_id = " pm_ #{ m [ " thread_id " ] } "
2015-01-29 15:06:30 +08:00
2023-01-07 11:53:14 +00:00
if is_first_pm ( m [ " id " ] , m [ " thread_id " ] )
2015-01-29 15:06:30 +08:00
# find the title from list table
2023-01-07 11:53:14 +00:00
pm_thread =
mysql_query (
2015-01-29 15:06:30 +08:00
SELECT plid thread_id , subject
2023-01-07 11:53:14 +00:00
FROM #{table_name "ucenter_pm_lists"}
WHERE plid = #{m["thread_id"]};",
) . first
mapped [ :title ] = pm_thread [ " subject " ]
2015-01-29 15:06:30 +08:00
mapped [ :archetype ] = Archetype . private_message
# Find the users who are part of this private message.
2023-01-07 11:53:14 +00:00
import_user_ids =
mysql_query (
2015-01-29 15:06:30 +08:00
SELECT plid thread_id , uid user_id
2023-01-07 11:53:14 +00:00
FROM #{table_name "ucenter_pm_members"}
WHERE plid = #{m["thread_id"]};
" ,
) . map { | r | r [ " user_id " ] } . uniq
mapped [ :target_usernames ] = import_user_ids
. map! do | import_user_id |
if import_user_id . to_s == m [ " user_id " ] . to_s
User . find_by ( id : user_id_from_imported_user_id ( import_user_id ) ) . try ( :username )
. compact
2015-01-29 15:06:30 +08:00
if mapped [ :target_usernames ] . empty? # pm with yourself?
skip = true
2023-01-07 11:53:14 +00:00
puts " Skipping pm: #{ m [ " id " ] } due to no target "
2015-01-29 15:06:30 +08:00
@first_post_id_by_topic_id [ thread_id ] = mapped [ :id ]
parent = topic_lookup_from_imported_post_id ( @first_post_id_by_topic_id [ thread_id ] )
if parent
mapped [ :topic_id ] = parent [ :topic_id ]
puts " Parent post pm thread: #{ thread_id } doesn't exist. Skipping #{ m [ " id " ] } : #{ m [ " message " ] [ 0 .. 40 ] } "
skip = true
skip ? nil : mapped
# search for first pm id for the series of pm
def is_first_pm ( pm_id , thread_id )
2023-01-07 11:53:14 +00:00
result =
mysql_query (
2015-01-29 15:06:30 +08:00
SELECT pmid id
2023-01-07 11:53:14 +00:00
FROM #{table_name "ucenter_pm_indexes"}
2015-01-29 15:06:30 +08:00
WHERE plid = #{thread_id}
2023-01-07 11:53:14 +00:00
ORDER BY id " ,
result . first [ " id " ] . to_s == pm_id . to_s
2015-01-29 15:06:30 +08:00
2016-01-26 17:07:54 +01:00
def process_and_upload_inline_images ( raw )
2023-01-07 11:53:14 +00:00
inline_image_regex = %r{ \ [img \ ]([ \ s \ S]*?) \ [/img \ ] }
2016-01-26 17:07:54 +01:00
2015-01-29 15:06:30 +08:00
s = raw . dup
s . gsub! ( inline_image_regex ) do | d |
matches = inline_image_regex . match ( d )
data = matches [ 1 ]
upload , filename = upload_inline_image data
upload ? html_for_upload ( upload , filename ) : nil
2016-01-26 17:07:54 +01:00
def process_discuzx_post ( raw , import_id )
# raw = process_and_upload_inline_images(raw)
s = raw . dup
2015-01-29 15:06:30 +08:00
# Strip the quote
# [quote] quotation includes the topic which is the same as reply to in Discourse
# We get the pid to find the post number the post reply to. So it can be stripped
2023-01-07 11:53:14 +00:00
s =
s . gsub (
%r{ \ [b \ ]回复 \ [url=forum.php \ ?mod=redirect&goto=findpost&pid= \ d+&ptid= \ d+ \ ].* 的帖子 \ [/url \ ] \ [/b \ ] }i ,
" " ,
) . strip
s =
s . gsub (
%r{ \ [b \ ]回复 \ [url=https?:// #{ ORIGINAL_SITE_PREFIX } /redirect.php \ ?goto=findpost&pid= \ d+&ptid= \ d+ \ ].*? \ [/url \ ].*? \ [/b \ ] }i ,
" " ,
) . strip
s . gsub! ( %r{ \ [quote \ ](.*)? \ [/quote \ ] }im ) do | matched |
2016-01-26 17:07:54 +01:00
content = $1
post_import_id = find_post_id_by_quote_number ( content )
if post_import_id
post_id = post_id_from_imported_post_id ( post_import_id . to_i )
if ( post = Post . find_by ( id : post_id ) )
" [quote= \" #{ post . user . username } \" , post: #{ post . post_number } , topic: #{ post . topic_id } ] \n #{ content } \n [/quote] "
puts " post #{ import_id } quote to not exists post #{ post_import_id } , skip reply "
matched [ 0 ]
matched [ 0 ]
2015-02-10 21:43:16 -08:00
2023-01-07 11:53:14 +00:00
s . gsub! (
%r{ \ [size=2 \ ] \ [color= # 999999 \ ].*? 发表于 [ \ d \ - \ : ]* \ [/color \ ] \ [url=forum.php \ ?mod=redirect&goto=findpost&pid= \ d+&ptid= \ d+ \ ].*? \ [/url \ ] \ [/size \ ] }i ,
" " ,
s . gsub! (
%r{ \ [size=2 \ ] \ [color= # 999999 \ ].*? 发表于 [ \ d \ - \ : ]* \ [/color \ ] \ [url=https?:// #{ ORIGINAL_SITE_PREFIX } /redirect.php \ ?goto=findpost&pid= \ d+&ptid= \ d+ \ ].*? \ [/url \ ] \ [/size \ ] }i ,
" " ,
2016-01-26 17:07:54 +01:00
# convert quote
2023-01-07 11:53:14 +00:00
s . gsub! ( %r{ \ [quote \ ](.*?) \ [/quote \ ] }m ) { " \n " + ( $1 . strip ) . gsub ( / ^ / , " > " ) + " \n " }
2016-01-26 17:07:54 +01:00
# truncate line space, preventing line starting with many blanks to be parsed as code blocks
2023-01-07 11:53:14 +00:00
s . gsub! ( / ^ {4,} / , " " )
2016-01-26 17:07:54 +01:00
# TODO: Much better to use bbcode-to-md gem
# Convert image bbcode with width and height
2023-01-07 11:53:14 +00:00
s . gsub! (
%r{ \ [img[^ \ ]]* \ ]https?:// #{ ORIGINAL_SITE_PREFIX } /(.*) \ [/img \ ] }i ,
'[x-attach]\1[/x-attach]' ,
) # dont convert attachment
s . gsub! (
%r{ <img[^>]*src="https?:// #{ ORIGINAL_SITE_PREFIX } /(.*)".*?> }i ,
'[x-attach]\1[/x-attach]' ,
) # dont convert attachment
s . gsub! (
%r{ \ [img[^ \ ]]* \ ]https?://www \ .touhou \ .cc/blog/(.*) \ [/img \ ] }i ,
'[x-attach]../blog/\1[/x-attach]' ,
) # 私货
s . gsub! (
%r{ \ [img[^ \ ]]* \ ]https?://www \ .touhou \ .cc/ucenter/avatar.php \ ?uid=( \ d+)[^ \ ]]* \ [/img \ ] }i ,
) { " [x-attach] #{ discuzx_avatar_fullpath ( $1 , false ) [ 0 ] } [/x-attach] " } # 私货
s . gsub! ( %r{ \ [img=( \ d+),( \ d+) \ ]([^ \ ]]*) \ [/img \ ] }i , '<img width="\1" height="\2" src="\3">' )
s . gsub! ( %r{ \ [img \ ]([^ \ ]]*) \ [/img \ ] }i , '<img src="\1">' )
s . gsub! (
%r{ \ [qq \ ]([^ \ ]]*) \ [/qq \ ] }i ,
'<a href="http://wpa.qq.com/msgrd?V=3&Uin=\1&Site=[Discuz!]&from=discuz&Menu=yes" target="_blank"><!--<img src="static/image/common/qq_big.gif" border="0">-->QQ 交谈</a>' ,
s . gsub! ( %r{ \ [email \ ]([^ \ ]]*) \ [/email \ ] }i , '[url=mailto:\1]\1[/url]' ) # bbcode-to-md can convert it
s . gsub! ( %r{ \ [s \ ]([^ \ ]]*) \ [/s \ ] }i , '<s>\1</s>' )
s . gsub! ( %r{ \ [sup \ ]([^ \ ]]*) \ [/sup \ ] }i , '<sup>\1</sup>' )
s . gsub! ( %r{ \ [sub \ ]([^ \ ]]*) \ [/sub \ ] }i , '<sub>\1</sub>' )
2016-01-26 17:07:54 +01:00
s . gsub! ( / \ [hr \ ] /i , " \n --- \n " )
# remove the media tag
2023-01-07 11:53:14 +00:00
s . gsub! ( %r{ \ [/?media[^ \ ]]* \ ] }i , " \n " )
s . gsub! ( %r{ \ [/?flash[^ \ ]]* \ ] }i , " \n " )
s . gsub! ( %r{ \ [/?audio[^ \ ]]* \ ] }i , " \n " )
s . gsub! ( %r{ \ [/?video[^ \ ]]* \ ] }i , " \n " )
2015-01-29 15:06:30 +08:00
2015-03-24 17:59:08 +08:00
# Remove the font, p and backcolor tag
2015-01-29 15:06:30 +08:00
# Discourse doesn't support the font tag
2023-01-07 11:53:14 +00:00
s . gsub! ( / \ [font=[^ \ ]]*? \ ] /i , " " )
s . gsub! ( %r{ \ [/font \ ] }i , " " )
s . gsub! ( / \ [p=[^ \ ]]*? \ ] /i , " " )
s . gsub! ( %r{ \ [/p \ ] }i , " " )
s . gsub! ( / \ [backcolor=[^ \ ]]*? \ ] /i , " " )
s . gsub! ( %r{ \ [/backcolor \ ] }i , " " )
2015-01-29 15:06:30 +08:00
# Remove the size tag
# I really have no idea what is this
2023-01-07 11:53:14 +00:00
s . gsub! ( / \ [size=[^ \ ]]*? \ ] /i , " " )
s . gsub! ( %r{ \ [/size \ ] }i , " " )
2015-01-29 15:06:30 +08:00
# Remove the color tag
2023-01-07 11:53:14 +00:00
s . gsub! ( / \ [color=[^ \ ]]*? \ ] /i , " " )
s . gsub! ( %r{ \ [/color \ ] }i , " " )
2015-01-29 15:06:30 +08:00
2015-02-10 21:43:16 -08:00
# Remove the hide tag
2023-01-07 11:53:14 +00:00
s . gsub! ( %r{ \ [/?hide \ ] }i , " " )
s . gsub! ( %r{ \ [/?free[^ \ ]]* \ ] }i , " \n " )
2015-02-10 21:43:16 -08:00
2015-01-29 15:06:30 +08:00
# Remove the align tag
# still don't know what it is
2016-01-26 17:07:54 +01:00
s . gsub! ( / \ [align=[^ \ ]]*? \ ] /i , " \n " )
2023-01-07 11:53:14 +00:00
s . gsub! ( %r{ \ [/align \ ] }i , " \n " )
2016-01-26 17:07:54 +01:00
s . gsub! ( / \ [float=[^ \ ]]*? \ ] /i , " \n " )
2023-01-07 11:53:14 +00:00
s . gsub! ( %r{ \ [/float \ ] }i , " \n " )
2015-01-29 15:06:30 +08:00
# Convert code
2023-01-07 11:53:14 +00:00
s . gsub! ( %r{ \ [/?code \ ] }i , " \n ``` \n " )
2015-01-29 15:06:30 +08:00
# The edit notice should be removed
# example: 本帖最后由 Helloworld 于 2015-1-28 22:05 编辑
2023-01-07 11:53:14 +00:00
s . gsub! ( %r{ \ [i=s \ ] 本帖最后由[ \ s \ S]*?编辑 \ [/i \ ] } , " " )
2015-01-29 15:06:30 +08:00
# Convert the custom smileys to emojis
# `{:cry:}` to `:cry`
s . gsub! ( / \ {( \ : \ S*? \ :) \ } / , '\1' )
# Replace internal forum links that aren't in the <!-- l --> format
# convert list tags to ul and list=1 tags to ol
# (basically, we're only missing list=a here...)
2023-01-07 11:53:14 +00:00
s . gsub! ( %r{ \ [list \ ](.*?) \ [/list:u \ ] }m , '[ul]\1[/ul]' )
s . gsub! ( %r{ \ [list=1 \ ](.*?) \ [/list:o \ ] }m , '[ol]\1[/ol]' )
2015-01-29 15:06:30 +08:00
# convert *-tags to li-tags so bbcode-to-md can do its magic on phpBB's lists:
2023-01-07 11:53:14 +00:00
s . gsub! ( %r{ \ [ \ * \ ](.*?) \ [/ \ *:m \ ] } , '[li]\1[/li]' )
2015-01-29 15:06:30 +08:00
# Discuz can create PM out of a post, which will generates like
# [url=http://example.com/forum.php?mod=redirect&goto=findpost&pid=111&ptid=11][b]关于您在“主题名称”的帖子[/b][/url]
2023-01-07 11:53:14 +00:00
s . gsub! ( pm_url_regexp ) { | discuzx_link | replace_internal_link ( discuzx_link , $1 ) }
2015-01-29 15:06:30 +08:00
# [url][b]text[/b][/url] to **[url]text[/url]**
2023-01-07 11:53:14 +00:00
s . gsub! ( %r{ ( \ [url=[^ \ [ \ ]]*? \ ]) \ [b \ ]( \ S*) \ [/b \ ]( \ [/url \ ]) } , '**\1\2\3**' )
2015-01-29 15:06:30 +08:00
2016-01-26 17:07:54 +01:00
@internal_url_regexps . each do | internal_url_regexp |
s . gsub! ( internal_url_regexp ) do | discuzx_link |
2023-01-07 11:53:14 +00:00
replace_internal_link (
discuzx_link ,
$~ [ :tid ] . to_i
rescue StandardError
) ,
$~ [ :pid ] . to_i
rescue StandardError
) ,
$~ [ :fid ] . to_i
rescue StandardError
) ,
$~ [ :action ]
rescue StandardError
) ,
2016-01-26 17:07:54 +01:00
2015-01-29 15:06:30 +08:00
# @someone without the url
2023-01-07 11:53:14 +00:00
s . gsub! ( %r{ @ \ [url=[^ \ [ \ ]]*? \ ]( \ S*) \ [/url \ ] }i , '@\1' )
2015-01-29 15:06:30 +08:00
2023-01-07 11:53:14 +00:00
s . scan ( %r{ http(?:s)?:// #{ ORIGINAL_SITE_PREFIX . gsub ( " . " , '\.' ) } /[^ \ [ \ ] \ s]* } ) do | link |
puts " WARNING: post #{ import_id } can't replace internal url #{ link } "
2016-01-26 17:07:54 +01:00
2015-01-29 15:06:30 +08:00
s . strip
2023-01-07 11:53:14 +00:00
def replace_internal_link (
discuzx_link ,
import_topic_id ,
import_post_id ,
import_category_id ,
2016-01-26 17:07:54 +01:00
if import_post_id
post_id = post_id_from_imported_post_id import_post_id
if post_id
post = Post . find post_id
return post . full_url if post
if import_topic_id
2023-01-07 11:53:14 +00:00
results =
mysql_query (
" SELECT pid
FROM #{table_name "forum_post"}
2016-01-26 17:07:54 +01:00
WHERE tid = #{import_topic_id} AND first
2023-01-07 11:53:14 +00:00
LIMIT 1 " ,
2015-01-29 15:06:30 +08:00
2016-01-26 17:07:54 +01:00
return discuzx_link unless results . size > 0
2015-01-29 15:06:30 +08:00
2023-01-07 11:53:14 +00:00
linked_post_id = results . first [ " pid " ]
2016-01-26 17:07:54 +01:00
lookup = topic_lookup_from_imported_post_id ( linked_post_id )
if lookup
return " #{ NEW_SITE_PREFIX } #{ lookup [ :url ] } "
return discuzx_link
2015-01-29 15:06:30 +08:00
2016-01-26 17:07:54 +01:00
if import_category_id
category_id = category_id_from_imported_category_id import_category_id
if category_id
category = Category . find category_id
return category . url if category
case action
2023-01-07 11:53:14 +00:00
when " index "
2016-01-26 17:07:54 +01:00
return " #{ NEW_SITE_PREFIX } / "
2023-01-07 11:53:14 +00:00
when " stat " , " stats " , " ranklist "
2016-01-26 17:07:54 +01:00
return " #{ NEW_SITE_PREFIX } /users "
2015-01-29 15:06:30 +08:00
def pm_url_regexp
2023-01-07 11:53:14 +00:00
@pm_url_regexp || =
Regexp . new (
" http(?:s)?:// #{ ORIGINAL_SITE_PREFIX . gsub ( " . " , '\.' ) } /forum \\ .php \\ ?mod=redirect&goto=findpost&pid= \\ d+&ptid=( \\ d+) " ,
2015-01-29 15:06:30 +08:00
# This step is done separately because it can take multiple attempts to get right (because of
# missing files, wrong paths, authorized extensions, etc.).
def import_attachments
2023-01-07 11:53:14 +00:00
setting = AUTHORIZED_EXTENSIONS . join ( " | " )
2015-01-29 15:06:30 +08:00
SiteSetting . authorized_extensions = setting if setting != SiteSetting . authorized_extensions
2023-01-07 11:53:14 +00:00
attachment_regex = %r{ \ [attach \ ]( \ d+) \ [/attach \ ] }
attachment_link_regex = %r{ \ [x-attach \ ](.+) \ [/x-attach \ ] }
2015-01-29 15:06:30 +08:00
current_count = 0
2023-01-07 11:53:14 +00:00
total_count =
mysql_query ( " SELECT count(*) count FROM #{ table_name " forum_post " } ; " ) . first [ " count " ]
2015-01-29 15:06:30 +08:00
success_count = 0
fail_count = 0
2023-01-07 11:53:14 +00:00
puts " " , " Importing attachments... " , " "
2015-01-29 15:06:30 +08:00
Post . find_each do | post |
2023-01-07 11:53:14 +00:00
next unless post . custom_fields [ " import_id " ] == post . custom_fields [ " import_id " ] . to_i . to_s
2016-01-26 17:07:54 +01:00
user = post . user
2015-01-29 15:06:30 +08:00
current_count += 1
print_status current_count , total_count
new_raw = post . raw . dup
2016-01-26 17:07:54 +01:00
inline_attachments = [ ]
2015-01-29 15:06:30 +08:00
new_raw . gsub! ( attachment_regex ) do | s |
2016-01-26 17:07:54 +01:00
attachment_id = $1 . to_i
inline_attachments . push attachment_id
2015-01-29 15:06:30 +08:00
upload , filename = find_upload ( user , post , attachment_id )
unless upload
fail_count += 1
html_for_upload ( upload , filename )
2016-01-26 17:07:54 +01:00
new_raw . gsub! ( attachment_link_regex ) do | s |
attachment_file = $1
filename = File . basename ( attachment_file )
upload = create_upload ( user . id , File . join ( DISCUZX_BASE_DIR , attachment_file ) , filename )
unless upload
fail_count += 1
html_for_upload ( upload , filename )
2023-01-07 11:53:14 +00:00
sql =
" SELECT aid
FROM #{table_name "forum_attachment"}
WHERE pid = #{post.custom_fields["import_id"]}"
sql = " #{ sql } AND aid NOT IN ( #{ inline_attachments . join ( " , " ) } ) " if ! inline_attachments . empty?
2016-01-26 17:07:54 +01:00
results = mysql_query ( sql )
results . each do | attachment |
2023-01-07 11:53:14 +00:00
attachment_id = attachment [ " aid " ]
2016-01-26 17:07:54 +01:00
upload , filename = find_upload ( user , post , attachment_id )
unless upload
fail_count += 1
html = html_for_upload ( upload , filename )
unless new_raw . include? html
new_raw << " \n "
new_raw << html
2015-01-29 15:06:30 +08:00
if new_raw != post . raw
2023-01-07 11:53:14 +00:00
PostRevisor . new ( post ) . revise! (
post . user ,
{ raw : new_raw } ,
bypass_bump : true ,
edit_reason : " 从 Discuz 中导入附件 " ,
2015-01-29 15:06:30 +08:00
success_count += 1
2023-01-07 11:53:14 +00:00
puts " " , " "
2015-01-29 15:06:30 +08:00
puts " succeeded: #{ success_count } "
puts " failed: #{ fail_count } " if fail_count > 0
2023-01-07 11:53:14 +00:00
puts " "
2015-01-29 15:06:30 +08:00
# Create the full path to the discuz avatar specified from user id
2016-01-26 17:07:54 +01:00
def discuzx_avatar_fullpath ( user_id , absolute = true )
2023-01-07 11:53:14 +00:00
padded_id = user_id . to_s . rjust ( 9 , " 0 " )
2015-01-29 15:06:30 +08:00
part_1 = padded_id [ 0 .. 2 ]
part_2 = padded_id [ 3 .. 4 ]
part_3 = padded_id [ 5 .. 6 ]
part_4 = padded_id [ - 2 .. - 1 ]
file_name = " #{ part_4 } _avatar_big.jpg "
2016-01-26 17:07:54 +01:00
if absolute
2019-12-10 11:48:27 +11:00
[ File . join ( DISCUZX_BASE_DIR , AVATAR_DIR , part_1 , part_2 , part_3 , file_name ) , file_name ]
2016-01-26 17:07:54 +01:00
2019-12-10 11:48:27 +11:00
[ File . join ( AVATAR_DIR , part_1 , part_2 , part_3 , file_name ) , file_name ]
2016-01-26 17:07:54 +01:00
2015-01-29 15:06:30 +08:00
# post id is in the quote block
def find_post_id_by_quote_number ( raw )
2016-01-26 17:07:54 +01:00
case raw
when / \ [url=forum.php \ ?mod=redirect&goto=findpost&pid=( \ d+)&ptid= \ d+ \ ] / #standard
2023-01-07 11:53:14 +00:00
when %r{ \ [url=https?:// #{ ORIGINAL_SITE_PREFIX } /redirect.php \ ?goto=findpost&pid=( \ d+)&ptid= \ d+ \ ] } # old discuz 7 format
2016-01-26 17:07:54 +01:00
2023-01-07 11:53:14 +00:00
when %r{ \ [quote \ ][ \ S \ s]*pid=( \ d+)[ \ S \ s]* \ [/quote \ ] } # quote
2016-01-26 17:07:54 +01:00
2015-01-29 15:06:30 +08:00
# for some reason, discuz inlined some png file
# the corresponding image stored is broken in a way
def upload_inline_image ( data )
return unless data
2023-01-07 11:53:14 +00:00
puts " Creating inline image "
2015-01-29 15:06:30 +08:00
2023-01-07 11:53:14 +00:00
encoded_photo = data [ " data:image/png;base64, " . length .. - 1 ]
2015-01-29 15:06:30 +08:00
if encoded_photo
raw_file = Base64 . decode64 ( encoded_photo )
2023-01-07 11:53:14 +00:00
puts " Error parsed inline photo " , data [ 0 .. 20 ]
2015-01-29 15:06:30 +08:00
real_filename = " #{ SecureRandom . hex } .png "
2023-01-07 11:53:14 +00:00
filename = Tempfile . new ( %w[ inline .png ] )
2015-01-29 15:06:30 +08:00
filename . binmode
filename . write ( raw_file )
filename . rewind
upload = create_upload ( Discourse :: SYSTEM_USER_ID , filename , real_filename )
2023-01-07 11:53:14 +00:00
filename . close
rescue StandardError
filename . unlink
rescue StandardError
2015-01-29 15:06:30 +08:00
if upload . nil? || ! upload . valid?
puts " Upload not valid :( "
puts upload . errors . inspect if upload
return nil
2019-11-14 15:10:51 -05:00
[ upload , real_filename ]
2015-01-29 15:06:30 +08:00
# find the uploaded file and real name from the db
def find_upload ( user , post , upload_id )
2023-01-07 11:53:14 +00:00
attachment_table = table_name " forum_attachment "
2015-01-29 15:06:30 +08:00
# search for table id
2023-01-07 11:53:14 +00:00
sql =
" SELECT a.pid post_id,
2015-01-29 15:06:30 +08:00
a . aid upload_id ,
a . tableid table_id
FROM #{attachment_table} a
2023-01-07 11:53:14 +00:00
WHERE a . pid = #{post.custom_fields["import_id"]}
2015-01-29 15:06:30 +08:00
AND a . aid = #{upload_id};"
results = mysql_query ( sql )
unless ( meta_data = results . first )
2023-01-07 11:53:14 +00:00
puts " Couldn't find forum_attachment record meta data for post.id = #{ post . id } , import_id = #{ post . custom_fields [ " import_id " ] } "
2015-01-29 15:06:30 +08:00
return nil
# search for uploaded file meta data
2023-01-07 11:53:14 +00:00
sql =
" SELECT a.pid post_id,
2015-01-29 15:06:30 +08:00
a . aid upload_id ,
a . tid topic_id ,
a . uid user_id ,
a . dateline uploaded_time ,
a . filename real_filename ,
a . attachment attachment_path ,
a . remote is_remote ,
a . description description ,
a . isimage is_image ,
a . thumb is_thumb
2023-01-07 11:53:14 +00:00
FROM #{attachment_table}_#{meta_data["table_id"]} a
2015-01-29 15:06:30 +08:00
WHERE a . aid = #{upload_id};"
results = mysql_query ( sql )
unless ( row = results . first )
2023-01-07 11:53:14 +00:00
puts " Couldn't find attachment record for post.id = #{ post . id } , import_id = #{ post . custom_fields [ " import_id " ] } "
2015-01-29 15:06:30 +08:00
return nil
2023-01-07 11:53:14 +00:00
filename = File . join ( DISCUZX_BASE_DIR , ATTACHMENT_DIR , row [ " attachment_path " ] )
2022-01-05 12:45:08 -05:00
unless File . exist? ( filename )
2015-01-29 15:06:30 +08:00
puts " Attachment file doesn't exist: #{ filename } "
return nil
2023-01-07 11:53:14 +00:00
real_filename = row [ " real_filename " ]
real_filename . prepend SecureRandom . hex if real_filename [ 0 ] == " . "
2015-01-29 15:06:30 +08:00
upload = create_upload ( user . id , filename , real_filename )
if upload . nil? || ! upload . valid?
puts " Upload not valid :( "
puts upload . errors . inspect if upload
return nil
2019-11-14 15:10:51 -05:00
[ upload , real_filename ]
2015-01-29 15:06:30 +08:00
rescue Mysql2 :: Error = > e
puts " SQL Error "
puts e . message
puts sql
2019-11-14 15:10:51 -05:00
2015-01-29 15:06:30 +08:00
2016-01-26 17:07:54 +01:00
def first_exists ( * items )
2023-01-07 11:53:14 +00:00
items . find { | item | ! item . blank? } || " "
2016-01-26 17:07:54 +01:00
2015-01-29 15:06:30 +08:00
def mysql_query ( sql )
@client . query ( sql , cache_rows : false )
ImportScripts :: DiscuzX . new . perform