2019-05-03 06:17:27 +08:00
# frozen_string_literal: true
2015-06-29 20:45:52 +08:00
require " mysql2 "
require File . expand_path ( File . dirname ( __FILE__ ) + " /base.rb " )
require 'htmlentities'
2020-08-25 04:19:57 +08:00
require_relative 'vanilla_body_parser'
2015-06-29 20:45:52 +08:00
class ImportScripts :: VanillaSQL < ImportScripts :: Base
VANILLA_DB = " vanilla_mysql "
TABLE_PREFIX = " GDN_ "
2016-04-02 03:57:20 +08:00
ATTACHMENTS_BASE_DIR = nil # "/absolute/path/to/attachments" set the absolute path if you have attachments
2015-06-29 20:45:52 +08:00
BATCH_SIZE = 1000
2016-01-16 05:47:05 +08:00
CONVERT_HTML = true
2015-06-29 20:45:52 +08:00
def initialize
super
@htmlentities = HTMLEntities . new
@client = Mysql2 :: Client . new (
host : " localhost " ,
username : " root " ,
password : " pa$$word " ,
database : VANILLA_DB
)
2016-06-28 04:17:00 +08:00
2020-08-25 04:19:57 +08:00
VanillaBodyParser . configure (
lookup : @lookup ,
uploader : @uploader ,
host : 'vanilla.yourforum.com' , # your Vanilla forum domain
uploads_path : 'uploads' # relative path to your vanilla uploads folder
)
2016-06-28 04:17:00 +08:00
@import_tags = false
begin
r = @client . query ( " select count(*) count from #{ TABLE_PREFIX } Tag where countdiscussions > 0 " )
@import_tags = true if r . first [ " count " ] . to_i > 0
rescue = > e
puts " Tags won't be imported. #{ e . message } "
end
2015-06-29 20:45:52 +08:00
end
def execute
2016-06-28 04:17:00 +08:00
if @import_tags
SiteSetting . tagging_enabled = true
SiteSetting . max_tags_per_topic = 10
end
2015-06-29 20:45:52 +08:00
import_users
2016-04-02 03:57:20 +08:00
import_avatars
2015-06-29 20:45:52 +08:00
import_categories
import_topics
import_posts
2020-08-25 04:19:57 +08:00
import_messages
2016-05-17 23:16:41 +08:00
update_tl0
create_permalinks
2015-06-29 20:45:52 +08:00
end
def import_users
puts '' , " creating users "
2016-01-13 07:44:14 +08:00
@user_is_deleted = false
@last_deleted_username = nil
username = nil
2017-06-19 13:00:55 +08:00
@last_user_id = - 1
2015-06-29 20:45:52 +08:00
total_count = mysql_query ( " SELECT count(*) count FROM #{ TABLE_PREFIX } User; " ) . first [ 'count' ]
2017-07-28 09:20:09 +08:00
2015-06-29 20:45:52 +08:00
batches ( BATCH_SIZE ) do | offset |
results = mysql_query (
2020-08-25 04:19:57 +08:00
" SELECT UserID, Name, Title, Location, About, Email, Admin, Banned, CountComments,
DateInserted , DateLastActive , InsertIPAddress
2015-06-29 20:45:52 +08:00
FROM #{TABLE_PREFIX}User
2018-01-20 00:53:07 +08:00
WHERE UserID > #{@last_user_id}
2015-06-29 20:45:52 +08:00
ORDER BY UserID ASC
2017-06-19 13:00:55 +08:00
LIMIT #{BATCH_SIZE};")
2015-06-29 20:45:52 +08:00
break if results . size < 1
2017-06-19 13:00:55 +08:00
@last_user_id = results . to_a . last [ 'UserID' ]
2016-01-12 03:20:07 +08:00
next if all_records_exist? :users , results . map { | u | u [ 'UserID' ] . to_i }
2015-09-22 07:48:42 +08:00
2015-06-29 20:45:52 +08:00
create_users ( results , total : total_count , offset : offset ) do | user |
2020-08-25 04:19:57 +08:00
email = user [ 'Email' ] . squish
next if email . blank?
2015-06-29 20:45:52 +08:00
next if user [ 'Name' ] . blank?
2016-05-17 23:16:41 +08:00
next if @lookup . user_id_from_imported_user_id ( user [ 'UserID' ] )
2016-01-13 07:44:14 +08:00
if user [ 'Name' ] == '[Deleted User]'
# EVERY deleted user record in Vanilla has the same username: [Deleted User]
# Save our UserNameSuggester some pain:
@user_is_deleted = true
username = @last_deleted_username || user [ 'Name' ]
else
@user_is_deleted = false
username = user [ 'Name' ]
end
2020-08-25 04:19:57 +08:00
banned = user [ 'Banned' ] != 0
commented = ( user [ 'CountComments' ] || 0 ) > 0
2015-06-29 20:45:52 +08:00
{ id : user [ 'UserID' ] ,
2020-08-25 04:19:57 +08:00
email : email ,
2016-01-13 07:44:14 +08:00
username : username ,
2015-06-29 20:45:52 +08:00
name : user [ 'Name' ] ,
created_at : user [ 'DateInserted' ] == nil ? 0 : Time . zone . at ( user [ 'DateInserted' ] ) ,
bio_raw : user [ 'About' ] ,
registration_ip_address : user [ 'InsertIPAddress' ] ,
last_seen_at : user [ 'DateLastActive' ] == nil ? 0 : Time . zone . at ( user [ 'DateLastActive' ] ) ,
location : user [ 'Location' ] ,
2016-01-13 07:44:14 +08:00
admin : user [ 'Admin' ] == 1 ,
2020-08-25 04:19:57 +08:00
trust_level : ! banned && commented ? 2 : 0 ,
2016-01-13 07:44:14 +08:00
post_create_action : proc do | newuser |
if @user_is_deleted
@last_deleted_username = newuser . username
end
2020-08-25 04:19:57 +08:00
if banned
newuser . suspended_at = Time . now
# banning on Vanilla doesn't have an end, so a thousand years seems equivalent
newuser . suspended_till = 1000 . years . from_now
if newuser . save
StaffActionLogger . new ( Discourse . system_user ) . log_user_suspend ( newuser , 'Imported from Vanilla Forum' )
else
puts " Failed to suspend user #{ newuser . username } . #{ newuser . errors . full_messages . join ( ', ' ) } "
end
end
2016-01-13 07:44:14 +08:00
end }
2015-06-29 20:45:52 +08:00
end
end
end
2016-04-02 03:57:20 +08:00
def import_avatars
if ATTACHMENTS_BASE_DIR && File . exists? ( ATTACHMENTS_BASE_DIR )
puts " " , " importing user avatars "
User . find_each do | u |
next unless u . custom_fields [ " import_id " ]
r = mysql_query ( " SELECT photo FROM #{ TABLE_PREFIX } User WHERE UserID = #{ u . custom_fields [ 'import_id' ] } ; " ) . first
next if r . nil?
photo = r [ " photo " ]
next unless photo . present?
# Possible encoded values:
# 1. cf://uploads/userpics/820/Y0AFUQYYM6QN.jpg
# 2. ~cf/userpics2/cf566487133f1f538e02da96f9a16b18.jpg
# 3. ~cf/userpics/txkt8kw1wozn.jpg
photo_real_filename = nil
parts = photo . squeeze ( " / " ) . split ( " / " )
if parts [ 0 ] == " cf: "
photo_path = " #{ ATTACHMENTS_BASE_DIR } / #{ parts [ 2 .. - 2 ] . join ( '/' ) } " . squeeze ( " / " )
elsif parts [ 0 ] == " ~cf "
photo_path = " #{ ATTACHMENTS_BASE_DIR } / #{ parts [ 1 .. - 2 ] . join ( '/' ) } " . squeeze ( " / " )
else
puts " UNKNOWN FORMAT: #{ photo } "
next
end
if ! File . exists? ( photo_path )
puts " Path to avatar file not found! Skipping. #{ photo_path } "
next
end
photo_real_filename = find_photo_file ( photo_path , parts . last )
if photo_real_filename . nil?
puts " Couldn't find file for #{ photo } . Skipping. "
next
end
print " . "
upload = create_upload ( u . id , photo_real_filename , File . basename ( photo_real_filename ) )
if upload . persisted?
u . import_mode = false
u . create_user_avatar
u . import_mode = true
u . user_avatar . update ( custom_upload_id : upload . id )
u . update ( uploaded_avatar_id : upload . id )
else
puts " Error: Upload did not persist for #{ u . username } #{ photo_real_filename } ! "
end
end
end
end
def find_photo_file ( path , base_filename )
base_guess = base_filename . dup
full_guess = File . join ( path , base_guess ) # often an exact match exists
return full_guess if File . exists? ( full_guess )
# Otherwise, the file exists but with a prefix:
# The p prefix seems to be the full file, so try to find that one first.
[ 'p' , 't' , 'n' ] . each do | prefix |
full_guess = File . join ( path , " #{ prefix } #{ base_guess } " )
return full_guess if File . exists? ( full_guess )
end
# Didn't find it.
nil
end
2015-06-29 20:45:52 +08:00
def import_categories
puts " " , " importing categories... "
categories = mysql_query ( "
SELECT CategoryID , Name , Description
FROM #{TABLE_PREFIX}Category
ORDER BY CategoryID ASC
" ).to_a
create_categories ( categories ) do | category |
{
id : category [ 'CategoryID' ] ,
name : CGI . unescapeHTML ( category [ 'Name' ] ) ,
description : CGI . unescapeHTML ( category [ 'Description' ] )
}
end
end
def import_topics
puts " " , " importing topics... "
2016-06-28 04:17:00 +08:00
tag_names_sql = " select t.name as tag_name from GDN_Tag t, GDN_TagDiscussion td where t.tagid = td.tagid and td.discussionid = {discussionid} and t.name != ''; "
2015-06-29 20:45:52 +08:00
total_count = mysql_query ( " SELECT count(*) count FROM #{ TABLE_PREFIX } Discussion; " ) . first [ 'count' ]
2017-07-28 09:20:09 +08:00
2017-06-19 13:00:55 +08:00
@last_topic_id = - 1
2017-07-28 09:20:09 +08:00
2015-06-29 20:45:52 +08:00
batches ( BATCH_SIZE ) do | offset |
discussions = mysql_query (
2020-08-25 04:19:57 +08:00
" SELECT DiscussionID, CategoryID, Name, Body, Format, CountViews, Closed, Announce,
DateInserted , InsertUserID , DateLastComment
2015-06-29 20:45:52 +08:00
FROM #{TABLE_PREFIX}Discussion
2018-01-20 00:53:07 +08:00
WHERE DiscussionID > #{@last_topic_id}
2015-06-29 20:45:52 +08:00
ORDER BY DiscussionID ASC
2017-06-19 13:00:55 +08:00
LIMIT #{BATCH_SIZE};")
2015-06-29 20:45:52 +08:00
break if discussions . size < 1
2017-06-19 13:00:55 +08:00
@last_topic_id = discussions . to_a . last [ 'DiscussionID' ]
2015-09-22 07:48:42 +08:00
next if all_records_exist? :posts , discussions . map { | t | " discussion # " + t [ 'DiscussionID' ] . to_s }
2015-06-29 20:45:52 +08:00
create_posts ( discussions , total : total_count , offset : offset ) do | discussion |
2020-08-25 04:19:57 +08:00
user_id = user_id_from_imported_user_id ( discussion [ 'InsertUserID' ] ) || Discourse :: SYSTEM_USER_ID
2015-06-29 20:45:52 +08:00
{
id : " discussion # " + discussion [ 'DiscussionID' ] . to_s ,
2020-08-25 04:19:57 +08:00
user_id : user_id ,
2015-06-29 20:45:52 +08:00
title : discussion [ 'Name' ] ,
category : category_id_from_imported_category_id ( discussion [ 'CategoryID' ] ) ,
2020-08-25 04:19:57 +08:00
raw : VanillaBodyParser . new ( discussion , user_id ) . parse ,
views : discussion [ 'CountViews' ] || 0 ,
closed : discussion [ 'Closed' ] == 1 ,
pinned_at : discussion [ 'Announce' ] == 0 ? nil : Time . zone . at ( discussion [ 'DateLastComment' ] || discussion [ 'DateInserted' ] ) ,
pinned_globally : discussion [ 'Announce' ] == 1 ,
2016-06-28 04:17:00 +08:00
created_at : Time . zone . at ( discussion [ 'DateInserted' ] ) ,
post_create_action : proc do | post |
if @import_tags
tag_names = @client . query ( tag_names_sql . gsub ( '{discussionid}' , discussion [ 'DiscussionID' ] . to_s ) ) . map { | row | row [ 'tag_name' ] }
DiscourseTagging . tag_topic_by_names ( post . topic , staff_guardian , tag_names )
end
end
2015-06-29 20:45:52 +08:00
}
end
end
end
def import_posts
puts " " , " importing posts... "
total_count = mysql_query ( " SELECT count(*) count FROM #{ TABLE_PREFIX } Comment; " ) . first [ 'count' ]
2017-06-19 13:00:55 +08:00
@last_post_id = - 1
2015-06-29 20:45:52 +08:00
batches ( BATCH_SIZE ) do | offset |
comments = mysql_query (
2020-08-25 04:19:57 +08:00
" SELECT CommentID, DiscussionID, Body, Format,
2015-06-29 20:45:52 +08:00
DateInserted , InsertUserID
FROM #{TABLE_PREFIX}Comment
2018-01-20 00:53:07 +08:00
WHERE CommentID > #{@last_post_id}
2015-06-29 20:45:52 +08:00
ORDER BY CommentID ASC
2017-06-19 13:00:55 +08:00
LIMIT #{BATCH_SIZE};")
2015-06-29 20:45:52 +08:00
break if comments . size < 1
2017-06-19 13:00:55 +08:00
@last_post_id = comments . to_a . last [ 'CommentID' ]
2015-09-22 07:48:42 +08:00
next if all_records_exist? :posts , comments . map { | comment | " comment # " + comment [ 'CommentID' ] . to_s }
2015-06-29 20:45:52 +08:00
create_posts ( comments , total : total_count , offset : offset ) do | comment |
next unless t = topic_lookup_from_imported_post_id ( " discussion # " + comment [ 'DiscussionID' ] . to_s )
next if comment [ 'Body' ] . blank?
2020-08-25 04:19:57 +08:00
user_id = user_id_from_imported_user_id ( comment [ 'InsertUserID' ] ) || Discourse :: SYSTEM_USER_ID
2015-06-29 20:45:52 +08:00
{
id : " comment # " + comment [ 'CommentID' ] . to_s ,
2020-08-25 04:19:57 +08:00
user_id : user_id ,
2015-06-29 20:45:52 +08:00
topic_id : t [ :topic_id ] ,
2020-08-25 04:19:57 +08:00
raw : VanillaBodyParser . new ( comment , user_id ) . parse ,
2015-06-29 20:45:52 +08:00
created_at : Time . zone . at ( comment [ 'DateInserted' ] )
}
end
end
end
2020-08-25 04:19:57 +08:00
def import_messages
puts " " , " importing messages... "
2015-06-29 20:45:52 +08:00
2020-08-25 04:19:57 +08:00
total_count = mysql_query ( " SELECT count(*) count FROM #{ TABLE_PREFIX } ConversationMessage; " ) . first [ 'count' ]
2015-06-29 20:45:52 +08:00
2020-08-25 04:19:57 +08:00
@last_message_id = - 1
2015-06-29 20:45:52 +08:00
2020-08-25 04:19:57 +08:00
batches ( BATCH_SIZE ) do | offset |
messages = mysql_query (
" SELECT m.MessageID, m.Body, m.Format,
m . InsertUserID , m . DateInserted ,
m . ConversationID , c . Contributors
FROM #{TABLE_PREFIX}ConversationMessage m
INNER JOIN #{TABLE_PREFIX}Conversation c on c.ConversationID = m.ConversationID
WHERE m . MessageID > #{@last_message_id}
ORDER BY m . MessageID ASC
LIMIT #{BATCH_SIZE};")
2015-06-29 20:45:52 +08:00
2020-08-25 04:19:57 +08:00
break if messages . size < 1
@last_message_id = messages . to_a . last [ 'MessageID' ]
next if all_records_exist? :posts , messages . map { | t | " message # " + t [ 'MessageID' ] . to_s }
create_posts ( messages , total : total_count , offset : offset ) do | message |
user_id = user_id_from_imported_user_id ( message [ 'InsertUserID' ] ) || Discourse :: SYSTEM_USER_ID
body = VanillaBodyParser . new ( message , user_id ) . parse
common = {
user_id : user_id ,
raw : body ,
created_at : Time . zone . at ( message [ 'DateInserted' ] ) ,
custom_fields : {
conversation_id : message [ 'ConversationID' ] ,
participants : message [ 'Contributors' ] ,
message_id : message [ 'MessageID' ]
}
}
2015-06-29 20:45:52 +08:00
2020-08-25 04:19:57 +08:00
conversation_id = " conversation # " + message [ 'ConversationID' ] . to_s
message_id = " message # " + message [ 'MessageID' ] . to_s
2015-06-29 20:45:52 +08:00
2020-08-25 04:19:57 +08:00
imported_conversation = topic_lookup_from_imported_post_id ( conversation_id )
2016-01-16 05:47:05 +08:00
2020-08-25 04:19:57 +08:00
if imported_conversation . present?
common . merge ( id : message_id , topic_id : imported_conversation [ :topic_id ] )
else
user_ids = ( message [ 'Contributors' ] || '' ) . scan ( / \ "( \ d+) \ " / ) . flatten . map ( & :to_i )
usernames = user_ids . map { | id | @lookup . find_user_by_import_id ( id ) . try ( :username ) } . compact
usernames = [ @lookup . find_user_by_import_id ( message [ 'InsertUserID' ] ) . try ( :username ) ] . compact if usernames . empty?
title = body . truncate ( 40 )
{
id : conversation_id ,
title : title ,
archetype : Archetype . private_message ,
target_usernames : usernames . uniq ,
} . merge ( common )
end
end
2016-01-16 05:47:05 +08:00
end
2015-06-29 20:45:52 +08:00
end
2016-06-28 04:17:00 +08:00
def staff_guardian
@_staff_guardian || = Guardian . new ( Discourse . system_user )
end
2015-06-29 20:45:52 +08:00
def mysql_query ( sql )
2016-01-13 07:45:19 +08:00
@client . query ( sql )
# @client.query(sql, cache_rows: false) #segfault: cache_rows: false causes segmentation fault
2015-06-29 20:45:52 +08:00
end
2016-05-17 23:16:41 +08:00
def create_permalinks
puts '' , 'Creating redirects...' , ''
User . find_each do | u |
ucf = u . custom_fields
if ucf && ucf [ " import_id " ] && ucf [ " import_username " ]
2020-08-25 04:19:57 +08:00
encoded_username = CGI . escape ( ucf [ 'import_username' ] ) . gsub ( '+' , '%20' )
Permalink . create ( url : " profile/ #{ ucf [ 'import_id' ] } / #{ encoded_username } " , external_url : " /users/ #{ u . username } " ) rescue nil
2016-07-06 22:58:43 +08:00
print '.'
2016-05-17 23:16:41 +08:00
end
end
Post . find_each do | post |
pcf = post . custom_fields
if pcf && pcf [ " import_id " ]
topic = post . topic
id = pcf [ " import_id " ] . split ( '#' ) . last
if post . post_number == 1
slug = Slug . for ( topic . title ) # probably matches what vanilla would do...
2016-05-18 05:43:46 +08:00
Permalink . create ( url : " discussion/ #{ id } / #{ slug } " , topic_id : topic . id ) rescue nil
2016-05-17 23:16:41 +08:00
else
2016-05-18 05:43:46 +08:00
Permalink . create ( url : " discussion/comment/ #{ id } " , post_id : post . id ) rescue nil
2016-05-17 23:16:41 +08:00
end
2016-07-06 22:58:43 +08:00
print '.'
2016-05-17 23:16:41 +08:00
end
end
end
2015-06-29 20:45:52 +08:00
end
ImportScripts :: VanillaSQL . new . perform