2018-05-28 17:02:19 +08:00
require " mysql2 "
require " htmlentities "
require File . expand_path ( File . dirname ( __FILE__ ) + " /base.rb " )
class ImportScripts :: Smf1 < ImportScripts :: Base
BATCH_SIZE || = 5000
2018-05-30 21:53:57 +08:00
UPLOADS_DIR || = ENV [ " UPLOADS_DIR " ] . presence
FORUM_URL || = ENV [ " FORUM_URL " ] . presence
2018-05-28 17:02:19 +08:00
def initialize
2018-05-30 21:53:57 +08:00
fail " UPLOADS_DIR env variable is required (example: '/path/to/attachments') " unless UPLOADS_DIR
fail " FORUM_URL env variable is required (example: 'https://domain.com/forum') " unless FORUM_URL
@client = Mysql2 :: Client . new (
host : ENV [ " DB_HOST " ] || " localhost " ,
username : ENV [ " DB_USER " ] || " root " ,
password : ENV [ " DB_PW " ] ,
database : ENV [ " DB_NAME " ] ,
)
check_version!
2018-05-28 17:02:19 +08:00
super
@htmlentities = HTMLEntities . new
puts " Loading existing usernames... "
@old_to_new_usernames = UserCustomField . joins ( :user ) . where ( name : " import_username " ) . pluck ( " value " , " users.username " ) . to_h
puts " Loading pm mapping... "
@pm_mapping = { }
Topic
. joins ( :topic_allowed_users )
. where ( archetype : Archetype . private_message )
. where ( " title NOT ILIKE 'Re: %' " )
. group ( :id )
. order ( :id )
. pluck ( " string_agg(topic_allowed_users.user_id::text, ',' ORDER BY topic_allowed_users.user_id), title, topics.id " )
. each do | users , title , topic_id |
@pm_mapping [ users ] || = { }
@pm_mapping [ users ] [ title ] || = [ ]
@pm_mapping [ users ] [ title ] << topic_id
end
end
def execute
SiteSetting . permalink_normalizations = " /(.+) \\ ?.*/ \\ 1 "
import_groups
import_users
import_categories
import_posts
import_personal_posts
import_attachments
import_likes
import_feedbacks
import_banned_domains
import_banned_emails
import_banned_ips
end
def check_version!
version = mysql_query ( " SELECT value FROM smf_settings WHERE variable = 'smfVersion' LIMIT 1 " ) . first [ " value " ]
fail " Incompatible version ( #{ version } ) " unless version & . start_with? ( " 1. " )
end
def import_groups
puts " " , " Importing groups... "
# skip administrators/moderators
groups = mysql_query ( " SELECT id_group, groupName FROM smf_membergroups WHERE id_group > 3 " ) . to_a
create_groups ( groups ) do | g |
next if g [ " groupName " ] . blank?
{
id : g [ " id_group " ] ,
full_name : g [ " groupName " ] ,
}
end
end
def import_users
puts " " , " Importing users... "
last_user_id = - 1
total = mysql_query ( " SELECT COUNT(*) count FROM smf_members " ) . first [ " count " ]
batches ( BATCH_SIZE ) do | offset |
users = mysql_query ( << ~ SQL
SELECT m . id_member
, memberName
, dateRegistered
, id_group
, lastLogin
, realName
, emailAddress
, personalText
, CASE WHEN birthdate > '1900-01-01' THEN birthdate ELSE NULL END birthdate
, websiteUrl
, location
, usertitle
, memberIP
, memberIP2
, is_activated
, additionalGroups
2018-07-12 22:55:30 +08:00
, avatar
2018-05-28 17:02:19 +08:00
, id_attach
, attachmentType
, filename
FROM smf_members m
LEFT JOIN smf_attachments a ON a . id_member = m . id_member
WHERE m . id_member > #{last_user_id}
ORDER BY m . id_member
LIMIT #{BATCH_SIZE}
SQL
) . to_a
break if users . empty?
last_user_id = users [ - 1 ] [ " id_member " ]
user_ids = users . map { | u | u [ " id_member " ] }
next if all_records_exist? ( :users , user_ids )
create_users ( users , total : total , offset : offset ) do | u |
created_at = Time . zone . at ( u [ " dateRegistered " ] )
group_ids = [ u [ " id_group " ] , * u [ " additionalGroups " ] . split ( " , " ) . map ( & :to_i ) ] . uniq
{
id : u [ " id_member " ] ,
username : u [ " memberName " ] ,
created_at : created_at ,
first_seen_at : created_at ,
primary_group_id : group_id_from_imported_group_id ( u [ " id_group " ] ) ,
admin : group_ids . include? ( 1 ) ,
moderator : group_ids . include? ( 2 ) || group_ids . include? ( 3 ) ,
last_seen_at : Time . zone . at ( u [ " lastLogin " ] ) ,
name : u [ " realName " ] . presence ,
email : u [ " emailAddress " ] ,
bio_raw : pre_process_raw ( u [ " personalText " ] . presence ) ,
date_of_birth : u [ " birthdate " ] ,
website : u [ " website " ] . presence ,
location : u [ " location " ] . presence ,
title : u [ " usertitle " ] . presence ,
registration_ip_address : u [ " memberIP " ] ,
ip_address : u [ " memberIP2 " ] ,
active : u [ " is_activated " ] == 1 ,
approved : u [ " is_activated " ] == 1 ,
post_create_action : proc do | user |
# usernames
@old_to_new_usernames [ u [ " memberName " ] ] = user . username
# groups
GroupUser . transaction do
group_ids . each do | gid |
( group_id = group_id_from_imported_group_id ( gid ) ) && GroupUser . find_or_create_by ( user : user , group_id : group_id )
end
end
# avatar
2018-07-12 23:38:07 +08:00
avatar_url = nil
if u [ " avatar " ] . present?
if u [ " avatar " ] . start_with? ( " http " )
avatar_url = u [ " avatar " ]
elsif u [ " avatar " ] . start_with? ( " avatar_ " )
avatar_url = " #{ FORUM_URL } /avatar-members/ #{ u [ " avatar " ] } "
end
end
avatar_url || = if u [ " attachmentType " ] == 0 && u [ " id_attach " ] . present?
2018-05-30 21:53:57 +08:00
" #{ FORUM_URL } /index.php?action=dlattach;attach= #{ u [ " id_attach " ] } ;type=avatar "
2018-05-28 17:02:19 +08:00
elsif u [ " attachmentType " ] == 1 && u [ " filename " ] . present?
2018-05-30 21:53:57 +08:00
" #{ FORUM_URL } /avatar-members/ #{ u [ " filename " ] } "
2018-05-28 17:02:19 +08:00
end
if avatar_url . present?
UserAvatar . import_url_for_user ( avatar_url , user ) rescue nil
end
end
}
end
end
end
def import_categories
puts " " , " Importing categories... "
categories = mysql_query ( << ~ SQL
SELECT id_board
, id_parent
, boardOrder
, name
, description
FROM smf_boards
ORDER BY id_parent , id_board
SQL
) . to_a
parent_categories = categories . select { | c | c [ " id_parent " ] == 0 }
children_categories = categories . select { | c | c [ " id_parent " ] != 0 }
create_categories ( parent_categories ) do | c |
{
id : c [ " id_board " ] ,
name : c [ " name " ] ,
description : pre_process_raw ( c [ " description " ] . presence ) ,
position : c [ " boardOrder " ] ,
post_create_action : proc do | category |
Permalink . find_or_create_by ( url : " forums/index.php/board, #{ c [ " id_board " ] } .0.html " , category_id : category . id )
end ,
}
end
create_categories ( children_categories ) do | c |
{
id : c [ " id_board " ] ,
parent_category_id : category_id_from_imported_category_id ( c [ " id_parent " ] ) ,
name : c [ " name " ] ,
description : pre_process_raw ( c [ " description " ] . presence ) ,
position : c [ " boardOrder " ] ,
post_create_action : proc do | category |
Permalink . find_or_create_by ( url : " forums/index.php/board, #{ c [ " id_board " ] } .0.html " , category_id : category . id )
end ,
}
end
end
def import_posts
puts " " , " Importing posts... "
last_post_id = - 1
total = mysql_query ( " SELECT COUNT(*) count FROM smf_messages " ) . first [ " count " ]
batches ( BATCH_SIZE ) do | offset |
posts = mysql_query ( << ~ SQL
SELECT m . id_msg
, m . id_topic
, m . id_board
, m . posterTime
, m . id_member
, m . subject
, m . body
, t . isSticky
, t . id_first_msg
, t . numViews
FROM smf_messages m
JOIN smf_topics t ON t . id_topic = m . id_topic
WHERE m . id_msg > #{last_post_id}
ORDER BY m . id_msg
LIMIT #{BATCH_SIZE}
SQL
) . to_a
break if posts . empty?
last_post_id = posts [ - 1 ] [ " id_msg " ]
post_ids = posts . map { | p | p [ " id_msg " ] }
next if all_records_exist? ( :post , post_ids )
create_posts ( posts , total : total , offset : offset ) do | p |
created_at = Time . at ( p [ " posterTime " ] )
post = {
id : p [ " id_msg " ] ,
created_at : created_at ,
user_id : user_id_from_imported_user_id ( p [ " id_member " ] ) || - 1 ,
raw : pre_process_raw ( p [ " body " ] ) ,
}
if p [ " id_msg " ] == p [ " id_first_msg " ]
post [ :category ] = category_id_from_imported_category_id ( p [ " id_board " ] )
post [ :title ] = @htmlentities . decode ( p [ " subject " ] )
post [ :views ] = p [ " numViews " ]
post [ :pinned_at ] = created_at if p [ " isSticky " ] == 1
post [ :post_create_action ] = proc do | pp |
Permalink . find_or_create_by ( url : " forums/index.php/topic, #{ p [ " id_topic " ] } .0.html " , topic_id : pp . topic_id )
end
elsif parent = topic_lookup_from_imported_post_id ( p [ " id_first_msg " ] )
post [ :topic_id ] = parent [ :topic_id ]
post [ :post_create_action ] = proc do | pp |
Permalink . find_or_create_by ( url : " forums/index.php/topic, #{ p [ " id_topic " ] } .msg #{ p [ " id_msg " ] } .html " , post_id : pp . id )
end
else
next
end
post
end
end
end
def import_personal_posts
puts " " , " Importing personal posts... "
last_post_id = - 1
total = mysql_query ( " SELECT COUNT(*) count FROM smf_personal_messages WHERE deletedBySender = 0 " ) . first [ " count " ]
batches ( BATCH_SIZE ) do | offset |
posts = mysql_query ( << ~ SQL
SELECT id_pm
, id_member_from
, msgtime
, subject
, body
, ( SELECT GROUP_CONCAT ( id_member ) FROM smf_pm_recipients r WHERE r . id_pm = pm . id_pm ) recipients
FROM smf_personal_messages pm
WHERE deletedBySender = 0
AND id_pm > #{last_post_id}
ORDER BY id_pm
LIMIT #{BATCH_SIZE}
SQL
) . to_a
break if posts . empty?
last_post_id = posts [ - 1 ] [ " id_pm " ]
post_ids = posts . map { | p | " pm- #{ p [ " id_pm " ] } " }
next if all_records_exist? ( :post , post_ids )
create_posts ( posts , total : total , offset : offset ) do | p |
next unless user_id = user_id_from_imported_user_id ( p [ " id_member_from " ] )
next if p [ " recipients " ] . blank?
recipients = p [ " recipients " ] . split ( " , " ) . map { | id | user_id_from_imported_user_id ( id ) } . compact . uniq
next if recipients . empty?
id = " pm- #{ p [ " id_pm " ] } "
next if post_id_from_imported_post_id ( id )
post = {
id : id ,
created_at : Time . at ( p [ " msgtime " ] ) ,
user_id : user_id ,
raw : pre_process_raw ( p [ " body " ] ) ,
}
users = ( recipients + [ user_id ] ) . sort . uniq . join ( " , " )
title = @htmlentities . decode ( p [ " subject " ] )
if topic_id = find_pm_topic_id ( users , title )
post [ :topic_id ] = topic_id
else
post [ :archetype ] = Archetype . private_message
post [ :title ] = title
post [ :target_usernames ] = User . where ( id : recipients ) . pluck ( :username )
post [ :post_create_action ] = proc do | p |
@pm_mapping [ users ] || = { }
@pm_mapping [ users ] [ title ] || = [ ]
@pm_mapping [ users ] [ title ] << p . topic_id
end
end
post
end
end
end
def find_pm_topic_id ( users , title )
return unless title . start_with? ( " Re: " )
return unless @pm_mapping [ users ]
title = title . gsub ( / ^(Re: )+ /i , " " )
return unless @pm_mapping [ users ] [ title ]
@pm_mapping [ users ] [ title ] [ - 1 ]
end
def import_attachments
puts " " , " Importing attachments... "
count = 0
last_upload_id = - 1
total = mysql_query ( " SELECT COUNT(*) count FROM smf_attachments WHERE id_msg IS NOT NULL " ) . first [ " count " ]
batches ( BATCH_SIZE ) do | offset |
uploads = mysql_query ( << ~ SQL
SELECT id_attach
, id_msg
, filename
, file_hash
FROM smf_attachments
WHERE id_msg IS NOT NULL
AND id_attach > #{last_upload_id}
ORDER BY id_attach
LIMIT #{BATCH_SIZE}
SQL
) . to_a
break if uploads . empty?
last_upload_id = uploads [ - 1 ] [ " id_attach " ]
uploads . each do | u |
count += 1
next unless post = PostCustomField . joins ( :post ) . find_by ( name : " import_id " , value : u [ " id_msg " ] . to_s ) & . post
path = File . join ( UPLOADS_DIR , " #{ u [ " id_attach " ] } _ #{ u [ " file_hash " ] } " )
next unless File . exists? ( path ) && File . size ( path ) > 0
if upload = create_upload ( post . user_id , path , u [ " filename " ] )
html = html_for_upload ( upload , u [ " filename " ] )
2018-07-12 22:55:30 +08:00
unless post . raw [ html ] || PostUpload . where ( upload : upload , post : post ) . exists?
2018-05-28 17:02:19 +08:00
post . raw += " \n \n #{ html } \n \n "
post . save
PostUpload . create ( upload : upload , post : post )
end
end
print_status ( count , total , get_start_time ( " attachments " ) )
end
end
end
def import_likes
return if mysql_query ( " SHOW TABLES LIKE 'smf_thank_you_post' " ) . first . nil?
puts " " , " Importing likes... "
count = 0
total = mysql_query ( " SELECT COUNT(*) count FROM smf_thank_you_post WHERE thx_time > 0 " ) . first [ " count " ]
like = PostActionType . types [ :like ]
mysql_query ( " SELECT id_msg, id_member, thx_time FROM smf_thank_you_post WHERE thx_time > 0 ORDER BY id_thx_post " ) . each do | l |
print_status ( count += 1 , total , get_start_time ( " likes " ) )
next unless post_id = post_id_from_imported_post_id ( l [ " id_msg " ] )
next unless user_id = user_id_from_imported_user_id ( l [ " id_member " ] )
next if PostAction . where ( post_action_type_id : like , post_id : post_id , user_id : user_id ) . exists?
PostAction . create ( post_action_type_id : like , post_id : post_id , user_id : user_id , created_at : Time . at ( l [ " thx_time " ] ) )
end
end
FEEDBACKS || = - " feedbacks "
def import_feedbacks
return if mysql_query ( " SHOW TABLES LIKE 'smf_feedback' " ) . first . nil?
puts " " , " Importing feedbacks... "
User . register_custom_field_type ( FEEDBACKS , :json )
count = 0
total = mysql_query ( " SELECT COUNT(*) count FROM smf_feedback WHERE approved " ) . first [ " count " ]
mysql_query ( << ~ SQL
SELECT feedbackid
, id_member
, feedbackmember_id
, saledate
, saletype
, salevalue
, comment_short
, comment_long
FROM smf_feedback
WHERE approved
ORDER BY feedbackid
SQL
) . each do | f |
print_status ( count += 1 , total , get_start_time ( " feedbacks " ) )
2018-06-28 15:00:28 +08:00
next unless user_id_from = user_id_from_imported_user_id ( f [ " feedbackmember_id " ] )
next unless user_id_to = user_id_from_imported_user_id ( f [ " id_member " ] )
2018-05-28 17:02:19 +08:00
next unless user = User . find_by ( id : user_id_to )
feedbacks = user . custom_fields [ FEEDBACKS ] || [ ]
next if feedbacks . find { | ff | ff [ " id " ] == f [ " feedbackid " ] }
feedbacks << {
id : f [ " feedbackid " ] ,
created_at : Time . at ( f [ " saledate " ] ) ,
from : user_id_from ,
type : f [ " saletype " ] ,
value : f [ " salevalue " ] ,
comment_short : @htmlentities . decode ( f [ " comment_short " ] ) . strip . presence ,
comment_long : @htmlentities . decode ( f [ " comment_long " ] ) . strip . presence ,
}
user . custom_fields [ FEEDBACKS ] = feedbacks . to_json
user . save_custom_fields
end
end
def import_banned_domains
puts " " , " Importing banned email domains... "
blacklist = SiteSetting . email_domains_blacklist . split ( " | " )
banned_domains = mysql_query ( " SELECT SUBSTRING(email_address, 3) domain FROM smf_ban_items WHERE email_address RLIKE '^%@[^%]+$' GROUP BY email_address " ) . map { | r | r [ " domain " ] }
SiteSetting . email_domains_blacklist = ( blacklist + banned_domains ) . uniq . sort . join ( " | " )
end
def import_banned_emails
puts " " , " Importing banned emails... "
count = 0
banned_emails = mysql_query ( " SELECT email_address FROM smf_ban_items WHERE email_address RLIKE '^[^%]+@[^%]+$' GROUP BY email_address " ) . map { | r | r [ " email_address " ] }
banned_emails . each do | email |
print_status ( count += 1 , banned_emails . size , get_start_time ( " banned_emails " ) )
ScreenedEmail . find_or_create_by ( email : email )
end
end
def import_banned_ips
puts " " , " Importing banned IPs... "
count = 0
banned_ips = mysql_query ( << ~ SQL
SELECT CONCAT_WS ( '.' , ip_low1 , ip_low2 , ip_low3 , ip_low4 ) low
, CONCAT_WS ( '.' , ip_high1 , ip_high2 , ip_high3 , ip_high4 ) high
, hits
FROM smf_ban_items
WHERE ( ip_low1 + ip_low2 + ip_low3 + ip_low4 + ip_high1 + ip_high2 + ip_high3 + ip_high4 ) > 0
GROUP BY low , high , hits ;
SQL
) . to_a
banned_ips . each do | r |
print_status ( count += 1 , banned_ips . size , get_start_time ( " banned_ips " ) )
if r [ " low " ] == r [ " high " ]
if ! ScreenedIpAddress . where ( " ? <<= ip_address " , r [ " low " ] ) . exists?
ScreenedIpAddress . create ( ip_address : r [ " low " ] , match_count : r [ " hits " ] )
end
else
low_values = r [ " low " ] . split ( " . " ) . map ( & :to_i )
high_values = r [ " high " ] . split ( " . " ) . map ( & :to_i )
first_diff = low_values . zip ( high_values ) . count { | a , b | a == b }
first_diff -= 1 if low_values [ first_diff ] == 0 && high_values [ first_diff ] == 255
prefix = low_values [ 0 ... first_diff ]
suffix = [ 0 ] * ( 3 - first_diff )
mask = 8 * ( first_diff + 1 )
values = ( low_values [ first_diff ] .. high_values [ first_diff ] )
hits = ( r [ " hits " ] / [ 1 , values . count ] . max ) . floor
values . each do | v |
range_values = prefix + [ v ] + suffix
ip_address = " #{ range_values . join ( " . " ) } / #{ mask } "
if ! ScreenedIpAddress . where ( " ? <<= ip_address " , ip_address ) . exists?
ScreenedIpAddress . create ( ip_address : ip_address , match_count : hits )
end
end
end
end
ScreenedIpAddress . where ( last_match_at : nil ) . update_all ( last_match_at : Time . new ( 2000 , 01 , 01 ) )
puts " " , " Rolling up... "
ScreenedIpAddress . roll_up
end
IGNORED_BBCODE || = %w{
2018-07-19 16:29:54 +08:00
black blue center color email flash font glow green iurl left list move red
right shadown size table time white
2018-05-28 17:02:19 +08:00
}
def pre_process_raw ( raw )
return " " if raw . blank?
raw = @htmlentities . decode ( raw )
# [acronym]
2018-07-19 16:29:54 +08:00
raw . gsub! ( / \ [acronym=([^ \ ]]+) \ ](.*?) \ [ \/ acronym \ ] /im ) { %{ <abbr title=" #{ $1 } "> #{ $2 } </abbr> } }
2018-05-28 17:02:19 +08:00
# [br]
raw . gsub! ( / \ [br \ ] /i , " \n " )
raw . gsub! ( / <br \ s* \/ ?> /i , " \n " )
# [hr]
raw . gsub! ( / \ [hr \ ] /i , " <hr/> " )
# [sub]
2018-07-19 16:29:54 +08:00
raw . gsub! ( / \ [sub \ ](.*?) \ [ \/ sub \ ] /im ) { " <sub> #{ $1 } </sub> " }
2018-05-28 17:02:19 +08:00
# [sup]
2018-07-19 16:29:54 +08:00
raw . gsub! ( / \ [sup \ ](.*?) \ [ \/ sup \ ] /im ) { " <sup> #{ $1 } </sup> " }
2018-05-28 17:02:19 +08:00
# [html]
raw . gsub! ( / \ [html \ ] /i , " \n ```html \n " )
raw . gsub! ( / \ [ \/ html \ ] /i , " \n ``` \n " )
# [php]
raw . gsub! ( / \ [php \ ] /i , " \n ```php \n " )
raw . gsub! ( / \ [ \/ php \ ] /i , " \n ``` \n " )
# [code]
raw . gsub! ( / \ [ \/ ?code \ ] /i , " \n ``` \n " )
# [pre]
raw . gsub! ( / \ [ \/ ?pre \ ] /i , " \n ``` \n " )
# [tt]
raw . gsub! ( / \ [ \/ ?tt \ ] /i , " ` " )
# [ftp]
raw . gsub! ( / \ [ftp /i , " [url " )
raw . gsub! ( / \ [ \/ ftp \ ] /i , " [/url] " )
# [me]
2018-07-19 16:29:54 +08:00
raw . gsub! ( / \ [me=([^ \ ]]*) \ ](.*?) \ [ \/ me \ ] /im ) { " _ \\ * #{ $1 } #{ $2 } _ " }
2018-05-28 17:02:19 +08:00
# [li]
2018-07-19 16:29:54 +08:00
raw . gsub! ( / \ [li \ ](.*?) \ [ \/ li \ ] /im ) { " - #{ $1 } " }
# puts [img] on their own line
2018-07-19 19:11:01 +08:00
raw . gsub! ( / \ [img[^ \ ]]* \ ](.*?) \ [ \/ img \ ] /im ) { " \n #{ $1 } \n " }
2018-05-28 17:02:19 +08:00
# puts [youtube] on their own line
2018-07-19 16:29:54 +08:00
raw . gsub! ( / \ [youtube \ ](.*?) \ [ \/ youtube \ ] /im ) { " \n #{ $1 } \n " }
2018-05-28 17:02:19 +08:00
IGNORED_BBCODE . each { | code | raw . gsub! ( / \ [ #{ code } [^ \ ]]* \ ](.*?) \ [ \/ #{ code } \ ] /im , '\1' ) }
# ensure [/quote] are on their own line
raw . gsub! ( / \ s* \ [ \/ quote \ ] \ s* /im , " \n [/quote] \n " )
# [quote]
raw . gsub! ( / \ s* \ [quote (.+?) \ ] \ s /im ) {
params = $1
post_id = params [ / msg( \ d+) / , 1 ]
username = params [ / author=(.+) link= / , 1 ]
username = @old_to_new_usernames [ username ] if @old_to_new_usernames . has_key? ( username )
if t = topic_lookup_from_imported_post_id ( post_id )
%{ \n [quote=" #{ username } ,post: #{ t [ :post_number ] } ,topic: #{ t [ :topic_id ] } "] \n }
else
%{ \n [quote=" #{ username } "] \n }
end
}
# remove tapatalk mess
raw . gsub! ( / Sent from .+? using \ [url=.*? \ ].+? \ [ \/ url \ ] /i , " " )
raw . gsub! ( / Sent from .+? using .+? \ z /i , " " )
2018-07-19 19:17:43 +08:00
# clean URLs
raw . gsub! ( / \ [url=(.+?) \ ] \ 1 \ [ \/ url \ ] /i , '\1' )
2018-05-28 17:02:19 +08:00
raw
end
def mysql_query ( sql )
@client . query ( sql )
end
end
ImportScripts :: Smf1 . new . perform