2019-05-03 06:17:27 +08:00
# frozen_string_literal: true
2015-06-29 20:45:52 +08:00
require " mysql2 "
require File . expand_path ( File . dirname ( __FILE__ ) + " /base.rb " )
require 'htmlentities'
2021-03-12 00:21:56 +08:00
require 'reverse_markdown'
2020-08-25 04:19:57 +08:00
require_relative 'vanilla_body_parser'
2015-06-29 20:45:52 +08:00
class ImportScripts :: VanillaSQL < ImportScripts :: Base
2021-03-12 00:21:56 +08:00
VANILLA_DB = " vanilla "
2015-06-29 20:45:52 +08:00
TABLE_PREFIX = " GDN_ "
2016-04-02 03:57:20 +08:00
ATTACHMENTS_BASE_DIR = nil # "/absolute/path/to/attachments" set the absolute path if you have attachments
2015-06-29 20:45:52 +08:00
BATCH_SIZE = 1000
2016-01-16 05:47:05 +08:00
CONVERT_HTML = true
2015-06-29 20:45:52 +08:00
def initialize
super
@htmlentities = HTMLEntities . new
@client = Mysql2 :: Client . new (
host : " localhost " ,
username : " root " ,
database : VANILLA_DB
)
2016-06-28 04:17:00 +08:00
2021-03-12 00:21:56 +08:00
# by default, don't use the body parser as it's not pertinent to all versions
@vb_parser = false
2020-08-25 04:19:57 +08:00
VanillaBodyParser . configure (
lookup : @lookup ,
uploader : @uploader ,
2021-03-12 00:21:56 +08:00
host : 'forum.example.com' , # your Vanilla forum domain
2020-08-25 04:19:57 +08:00
uploads_path : 'uploads' # relative path to your vanilla uploads folder
)
2016-06-28 04:17:00 +08:00
@import_tags = false
begin
r = @client . query ( " select count(*) count from #{ TABLE_PREFIX } Tag where countdiscussions > 0 " )
@import_tags = true if r . first [ " count " ] . to_i > 0
rescue = > e
puts " Tags won't be imported. #{ e . message } "
end
2021-03-12 00:21:56 +08:00
@category_mappings = { }
2015-06-29 20:45:52 +08:00
end
def execute
2016-06-28 04:17:00 +08:00
if @import_tags
SiteSetting . tagging_enabled = true
SiteSetting . max_tags_per_topic = 10
end
2021-01-14 01:40:00 +08:00
import_groups
2015-06-29 20:45:52 +08:00
import_users
2016-04-02 03:57:20 +08:00
import_avatars
2021-01-14 01:40:00 +08:00
import_group_users
2015-06-29 20:45:52 +08:00
import_categories
import_topics
import_posts
2021-03-12 00:21:56 +08:00
import_likes
2020-08-25 04:19:57 +08:00
import_messages
2016-05-17 23:16:41 +08:00
update_tl0
2021-01-14 01:40:00 +08:00
mark_topics_as_solved
2016-05-17 23:16:41 +08:00
create_permalinks
2021-01-14 01:40:00 +08:00
import_attachments
2021-03-12 00:21:56 +08:00
mark_topics_as_solved
2021-01-14 01:40:00 +08:00
end
def import_groups
puts " " , " importing groups... "
groups = mysql_query <<-SQL
SELECT RoleID , Name
FROM #{TABLE_PREFIX}Role
ORDER BY RoleID
SQL
create_groups ( groups ) do | group |
{
id : group [ " RoleID " ] ,
name : @htmlentities . decode ( group [ " Name " ] ) . strip
}
end
2015-06-29 20:45:52 +08:00
end
def import_users
puts '' , " creating users "
2016-01-13 07:44:14 +08:00
@user_is_deleted = false
@last_deleted_username = nil
username = nil
2017-06-19 13:00:55 +08:00
@last_user_id = - 1
2015-06-29 20:45:52 +08:00
total_count = mysql_query ( " SELECT count(*) count FROM #{ TABLE_PREFIX } User; " ) . first [ 'count' ]
2017-07-28 09:20:09 +08:00
2015-06-29 20:45:52 +08:00
batches ( BATCH_SIZE ) do | offset |
results = mysql_query (
2020-08-25 04:19:57 +08:00
" SELECT UserID, Name, Title, Location, About, Email, Admin, Banned, CountComments,
DateInserted , DateLastActive , InsertIPAddress
2015-06-29 20:45:52 +08:00
FROM #{TABLE_PREFIX}User
2018-01-20 00:53:07 +08:00
WHERE UserID > #{@last_user_id}
2015-06-29 20:45:52 +08:00
ORDER BY UserID ASC
2017-06-19 13:00:55 +08:00
LIMIT #{BATCH_SIZE};")
2015-06-29 20:45:52 +08:00
break if results . size < 1
2017-06-19 13:00:55 +08:00
@last_user_id = results . to_a . last [ 'UserID' ]
2016-01-12 03:20:07 +08:00
next if all_records_exist? :users , results . map { | u | u [ 'UserID' ] . to_i }
2015-09-22 07:48:42 +08:00
2015-06-29 20:45:52 +08:00
create_users ( results , total : total_count , offset : offset ) do | user |
2020-08-25 04:19:57 +08:00
email = user [ 'Email' ] . squish
next if email . blank?
2015-06-29 20:45:52 +08:00
next if user [ 'Name' ] . blank?
2016-05-17 23:16:41 +08:00
next if @lookup . user_id_from_imported_user_id ( user [ 'UserID' ] )
2016-01-13 07:44:14 +08:00
if user [ 'Name' ] == '[Deleted User]'
# EVERY deleted user record in Vanilla has the same username: [Deleted User]
# Save our UserNameSuggester some pain:
@user_is_deleted = true
username = @last_deleted_username || user [ 'Name' ]
else
@user_is_deleted = false
username = user [ 'Name' ]
end
2020-08-25 04:19:57 +08:00
banned = user [ 'Banned' ] != 0
commented = ( user [ 'CountComments' ] || 0 ) > 0
2015-06-29 20:45:52 +08:00
{ id : user [ 'UserID' ] ,
2020-08-25 04:19:57 +08:00
email : email ,
2016-01-13 07:44:14 +08:00
username : username ,
2015-06-29 20:45:52 +08:00
name : user [ 'Name' ] ,
created_at : user [ 'DateInserted' ] == nil ? 0 : Time . zone . at ( user [ 'DateInserted' ] ) ,
bio_raw : user [ 'About' ] ,
registration_ip_address : user [ 'InsertIPAddress' ] ,
last_seen_at : user [ 'DateLastActive' ] == nil ? 0 : Time . zone . at ( user [ 'DateLastActive' ] ) ,
location : user [ 'Location' ] ,
2016-01-13 07:44:14 +08:00
admin : user [ 'Admin' ] == 1 ,
2020-08-25 04:19:57 +08:00
trust_level : ! banned && commented ? 2 : 0 ,
2016-01-13 07:44:14 +08:00
post_create_action : proc do | newuser |
if @user_is_deleted
@last_deleted_username = newuser . username
end
2020-08-25 04:19:57 +08:00
if banned
newuser . suspended_at = Time . now
# banning on Vanilla doesn't have an end, so a thousand years seems equivalent
newuser . suspended_till = 1000 . years . from_now
if newuser . save
StaffActionLogger . new ( Discourse . system_user ) . log_user_suspend ( newuser , 'Imported from Vanilla Forum' )
else
puts " Failed to suspend user #{ newuser . username } . #{ newuser . errors . full_messages . join ( ', ' ) } "
end
end
2016-01-13 07:44:14 +08:00
end }
2015-06-29 20:45:52 +08:00
end
end
end
2016-04-02 03:57:20 +08:00
def import_avatars
2022-01-06 01:45:08 +08:00
if ATTACHMENTS_BASE_DIR && File . exist? ( ATTACHMENTS_BASE_DIR )
2016-04-02 03:57:20 +08:00
puts " " , " importing user avatars "
User . find_each do | u |
next unless u . custom_fields [ " import_id " ]
r = mysql_query ( " SELECT photo FROM #{ TABLE_PREFIX } User WHERE UserID = #{ u . custom_fields [ 'import_id' ] } ; " ) . first
next if r . nil?
photo = r [ " photo " ]
next unless photo . present?
# Possible encoded values:
# 1. cf://uploads/userpics/820/Y0AFUQYYM6QN.jpg
# 2. ~cf/userpics2/cf566487133f1f538e02da96f9a16b18.jpg
# 3. ~cf/userpics/txkt8kw1wozn.jpg
photo_real_filename = nil
parts = photo . squeeze ( " / " ) . split ( " / " )
2021-01-14 01:40:00 +08:00
if parts [ 0 ] =~ / ^[a-z0-9]{2}: /
2016-04-02 03:57:20 +08:00
photo_path = " #{ ATTACHMENTS_BASE_DIR } / #{ parts [ 2 .. - 2 ] . join ( '/' ) } " . squeeze ( " / " )
elsif parts [ 0 ] == " ~cf "
photo_path = " #{ ATTACHMENTS_BASE_DIR } / #{ parts [ 1 .. - 2 ] . join ( '/' ) } " . squeeze ( " / " )
else
puts " UNKNOWN FORMAT: #{ photo } "
next
end
2022-01-06 01:45:08 +08:00
if ! File . exist? ( photo_path )
2016-04-02 03:57:20 +08:00
puts " Path to avatar file not found! Skipping. #{ photo_path } "
next
end
photo_real_filename = find_photo_file ( photo_path , parts . last )
if photo_real_filename . nil?
puts " Couldn't find file for #{ photo } . Skipping. "
next
end
print " . "
upload = create_upload ( u . id , photo_real_filename , File . basename ( photo_real_filename ) )
if upload . persisted?
u . import_mode = false
u . create_user_avatar
u . import_mode = true
u . user_avatar . update ( custom_upload_id : upload . id )
u . update ( uploaded_avatar_id : upload . id )
else
puts " Error: Upload did not persist for #{ u . username } #{ photo_real_filename } ! "
end
end
end
end
def find_photo_file ( path , base_filename )
base_guess = base_filename . dup
full_guess = File . join ( path , base_guess ) # often an exact match exists
2022-01-06 01:45:08 +08:00
return full_guess if File . exist? ( full_guess )
2016-04-02 03:57:20 +08:00
# Otherwise, the file exists but with a prefix:
# The p prefix seems to be the full file, so try to find that one first.
[ 'p' , 't' , 'n' ] . each do | prefix |
full_guess = File . join ( path , " #{ prefix } #{ base_guess } " )
2022-01-06 01:45:08 +08:00
return full_guess if File . exist? ( full_guess )
2016-04-02 03:57:20 +08:00
end
# Didn't find it.
nil
end
2021-01-14 01:40:00 +08:00
def import_group_users
puts " " , " importing group users... "
group_users = mysql_query ( "
SELECT RoleID , UserID
FROM #{TABLE_PREFIX}UserRole
" ).to_a
group_users . each do | row |
user_id = user_id_from_imported_user_id ( row [ " UserID " ] )
group_id = group_id_from_imported_group_id ( row [ " RoleID " ] )
if user_id && group_id
GroupUser . find_or_create_by ( user_id : user_id , group_id : group_id )
end
end
end
2015-06-29 20:45:52 +08:00
def import_categories
puts " " , " importing categories... "
categories = mysql_query ( "
2021-03-12 00:21:56 +08:00
SELECT CategoryID , ParentCategoryID , Name , Description
FROM #{TABLE_PREFIX}Category
WHERE CategoryID > 0
ORDER BY CategoryID ASC
" ).to_a
2015-06-29 20:45:52 +08:00
2021-03-12 00:21:56 +08:00
top_level_categories = categories . select { | c | c [ 'ParentCategoryID' ] . blank? || c [ 'ParentCategoryID' ] == - 1 }
create_categories ( top_level_categories ) do | category |
2015-06-29 20:45:52 +08:00
{
id : category [ 'CategoryID' ] ,
name : CGI . unescapeHTML ( category [ 'Name' ] ) ,
description : CGI . unescapeHTML ( category [ 'Description' ] )
}
end
2021-03-12 00:21:56 +08:00
top_level_category_ids = Set . new ( top_level_categories . map { | c | c [ " CategoryID " ] } )
subcategories = categories . select { | c | top_level_category_ids . include? ( c [ " ParentCategoryID " ] ) }
# Depth = 3
create_categories ( subcategories ) do | category |
{
id : category [ 'CategoryID' ] ,
parent_category_id : category_id_from_imported_category_id ( category [ 'ParentCategoryID' ] ) ,
name : CGI . unescapeHTML ( category [ 'Name' ] ) ,
description : category [ 'Description' ] ? CGI . unescapeHTML ( category [ 'Description' ] ) : nil ,
}
end
subcategory_ids = Set . new ( subcategories . map { | c | c [ 'CategoryID' ] } )
# Depth 4 and 5 need to be tags
categories . each do | c |
next if c [ 'ParentCategoryID' ] == - 1
next if top_level_category_ids . include? ( c [ 'CategoryID' ] )
next if subcategory_ids . include? ( c [ 'CategoryID' ] )
# Find a depth 3 category for topics in this category
parent = c
while ! parent . nil? && ! subcategory_ids . include? ( parent [ 'CategoryID' ] )
parent = categories . find { | subcat | subcat [ 'CategoryID' ] == parent [ 'ParentCategoryID' ] }
end
if parent
tag_name = DiscourseTagging . clean_tag ( c [ 'Name' ] )
tag = Tag . find_by_name ( tag_name ) || Tag . create ( name : tag_name )
@category_mappings [ c [ 'CategoryID' ] ] = {
category_id : category_id_from_imported_category_id ( parent [ 'CategoryID' ] ) ,
tag : tag [ :name ]
}
else
puts '' , " Couldn't find a category for #{ c [ 'CategoryID' ] } ' #{ c [ 'Name' ] } '! "
end
end
2015-06-29 20:45:52 +08:00
end
def import_topics
puts " " , " importing topics... "
2016-06-28 04:17:00 +08:00
tag_names_sql = " select t.name as tag_name from GDN_Tag t, GDN_TagDiscussion td where t.tagid = td.tagid and td.discussionid = {discussionid} and t.name != ''; "
2015-06-29 20:45:52 +08:00
total_count = mysql_query ( " SELECT count(*) count FROM #{ TABLE_PREFIX } Discussion; " ) . first [ 'count' ]
2017-07-28 09:20:09 +08:00
2017-06-19 13:00:55 +08:00
@last_topic_id = - 1
2017-07-28 09:20:09 +08:00
2015-06-29 20:45:52 +08:00
batches ( BATCH_SIZE ) do | offset |
discussions = mysql_query (
2020-08-25 04:19:57 +08:00
" SELECT DiscussionID, CategoryID, Name, Body, Format, CountViews, Closed, Announce,
DateInserted , InsertUserID , DateLastComment
2015-06-29 20:45:52 +08:00
FROM #{TABLE_PREFIX}Discussion
2018-01-20 00:53:07 +08:00
WHERE DiscussionID > #{@last_topic_id}
2015-06-29 20:45:52 +08:00
ORDER BY DiscussionID ASC
2017-06-19 13:00:55 +08:00
LIMIT #{BATCH_SIZE};")
2015-06-29 20:45:52 +08:00
break if discussions . size < 1
2017-06-19 13:00:55 +08:00
@last_topic_id = discussions . to_a . last [ 'DiscussionID' ]
2015-09-22 07:48:42 +08:00
next if all_records_exist? :posts , discussions . map { | t | " discussion # " + t [ 'DiscussionID' ] . to_s }
2015-06-29 20:45:52 +08:00
create_posts ( discussions , total : total_count , offset : offset ) do | discussion |
2020-08-25 04:19:57 +08:00
user_id = user_id_from_imported_user_id ( discussion [ 'InsertUserID' ] ) || Discourse :: SYSTEM_USER_ID
2015-06-29 20:45:52 +08:00
{
id : " discussion # " + discussion [ 'DiscussionID' ] . to_s ,
2020-08-25 04:19:57 +08:00
user_id : user_id ,
2015-06-29 20:45:52 +08:00
title : discussion [ 'Name' ] ,
2021-03-12 00:21:56 +08:00
category : category_id_from_imported_category_id ( discussion [ 'CategoryID' ] ) || @category_mappings [ discussion [ 'CategoryID' ] ] . try ( :[] , :category_id ) ,
2021-08-12 17:37:44 +08:00
raw : get_raw ( discussion , user_id ) ,
2020-08-25 04:19:57 +08:00
views : discussion [ 'CountViews' ] || 0 ,
closed : discussion [ 'Closed' ] == 1 ,
pinned_at : discussion [ 'Announce' ] == 0 ? nil : Time . zone . at ( discussion [ 'DateLastComment' ] || discussion [ 'DateInserted' ] ) ,
pinned_globally : discussion [ 'Announce' ] == 1 ,
2016-06-28 04:17:00 +08:00
created_at : Time . zone . at ( discussion [ 'DateInserted' ] ) ,
post_create_action : proc do | post |
if @import_tags
tag_names = @client . query ( tag_names_sql . gsub ( '{discussionid}' , discussion [ 'DiscussionID' ] . to_s ) ) . map { | row | row [ 'tag_name' ] }
2021-03-12 00:21:56 +08:00
category_tag = @category_mappings [ discussion [ 'CategoryID' ] ] . try ( :[] , :tag )
tag_names = category_tag ? tag_names . append ( category_tag ) : tag_names
2016-06-28 04:17:00 +08:00
DiscourseTagging . tag_topic_by_names ( post . topic , staff_guardian , tag_names )
end
end
2015-06-29 20:45:52 +08:00
}
end
end
end
def import_posts
puts " " , " importing posts... "
total_count = mysql_query ( " SELECT count(*) count FROM #{ TABLE_PREFIX } Comment; " ) . first [ 'count' ]
2017-06-19 13:00:55 +08:00
@last_post_id = - 1
2015-06-29 20:45:52 +08:00
batches ( BATCH_SIZE ) do | offset |
comments = mysql_query (
2020-08-25 04:19:57 +08:00
" SELECT CommentID, DiscussionID, Body, Format,
2021-01-14 01:40:00 +08:00
DateInserted , InsertUserID , QnA
2015-06-29 20:45:52 +08:00
FROM #{TABLE_PREFIX}Comment
2018-01-20 00:53:07 +08:00
WHERE CommentID > #{@last_post_id}
2015-06-29 20:45:52 +08:00
ORDER BY CommentID ASC
2017-06-19 13:00:55 +08:00
LIMIT #{BATCH_SIZE};")
2015-06-29 20:45:52 +08:00
break if comments . size < 1
2017-06-19 13:00:55 +08:00
@last_post_id = comments . to_a . last [ 'CommentID' ]
2015-09-22 07:48:42 +08:00
next if all_records_exist? :posts , comments . map { | comment | " comment # " + comment [ 'CommentID' ] . to_s }
2015-06-29 20:45:52 +08:00
create_posts ( comments , total : total_count , offset : offset ) do | comment |
next unless t = topic_lookup_from_imported_post_id ( " discussion # " + comment [ 'DiscussionID' ] . to_s )
next if comment [ 'Body' ] . blank?
2020-08-25 04:19:57 +08:00
user_id = user_id_from_imported_user_id ( comment [ 'InsertUserID' ] ) || Discourse :: SYSTEM_USER_ID
2021-03-12 00:21:56 +08:00
post = {
2015-06-29 20:45:52 +08:00
id : " comment # " + comment [ 'CommentID' ] . to_s ,
2020-08-25 04:19:57 +08:00
user_id : user_id ,
2015-06-29 20:45:52 +08:00
topic_id : t [ :topic_id ] ,
2021-08-12 17:37:44 +08:00
raw : get_raw ( comment , user_id ) ,
2015-06-29 20:45:52 +08:00
created_at : Time . zone . at ( comment [ 'DateInserted' ] )
}
2021-01-14 01:40:00 +08:00
if comment [ 'QnA' ] == " Accepted "
2021-03-12 00:21:56 +08:00
post [ :custom_fields ] = { is_accepted_answer : true }
2021-01-14 01:40:00 +08:00
end
2021-03-12 00:21:56 +08:00
post
2015-06-29 20:45:52 +08:00
end
end
end
2021-03-12 00:21:56 +08:00
def import_likes
puts " " , " importing likes... "
total_count = mysql_query ( " SELECT count(*) count FROM GDN_ThanksLog; " ) . first [ 'count' ]
current_count = 0
start_time = Time . now
likes = mysql_query ( "
SELECT CommentID , DateInserted , InsertUserID
FROM #{TABLE_PREFIX}ThanksLog
ORDER BY CommentID ASC ;
" )
likes . each do | like |
post_id = post_id_from_imported_post_id ( " comment # #{ like [ 'CommentID' ] } " )
user_id = user_id_from_imported_user_id ( like [ 'InsertUserID' ] )
post = Post . find ( post_id ) if post_id
user = User . find ( user_id ) if user_id
if post && user
begin
PostActionCreator . like ( user , post )
rescue = > e
puts " error adding like to post #{ e } "
end
end
current_count += 1
print_status ( current_count , total_count , start_time )
end
end
2020-08-25 04:19:57 +08:00
def import_messages
puts " " , " importing messages... "
2015-06-29 20:45:52 +08:00
2020-08-25 04:19:57 +08:00
total_count = mysql_query ( " SELECT count(*) count FROM #{ TABLE_PREFIX } ConversationMessage; " ) . first [ 'count' ]
2015-06-29 20:45:52 +08:00
2020-08-25 04:19:57 +08:00
@last_message_id = - 1
2015-06-29 20:45:52 +08:00
2020-08-25 04:19:57 +08:00
batches ( BATCH_SIZE ) do | offset |
messages = mysql_query (
" SELECT m.MessageID, m.Body, m.Format,
m . InsertUserID , m . DateInserted ,
m . ConversationID , c . Contributors
FROM #{TABLE_PREFIX}ConversationMessage m
INNER JOIN #{TABLE_PREFIX}Conversation c on c.ConversationID = m.ConversationID
WHERE m . MessageID > #{@last_message_id}
ORDER BY m . MessageID ASC
LIMIT #{BATCH_SIZE};")
2015-06-29 20:45:52 +08:00
2020-08-25 04:19:57 +08:00
break if messages . size < 1
@last_message_id = messages . to_a . last [ 'MessageID' ]
next if all_records_exist? :posts , messages . map { | t | " message # " + t [ 'MessageID' ] . to_s }
create_posts ( messages , total : total_count , offset : offset ) do | message |
user_id = user_id_from_imported_user_id ( message [ 'InsertUserID' ] ) || Discourse :: SYSTEM_USER_ID
2021-08-12 17:37:44 +08:00
body = get_raw ( message , user_id )
2020-08-25 04:19:57 +08:00
common = {
user_id : user_id ,
raw : body ,
created_at : Time . zone . at ( message [ 'DateInserted' ] ) ,
custom_fields : {
conversation_id : message [ 'ConversationID' ] ,
participants : message [ 'Contributors' ] ,
message_id : message [ 'MessageID' ]
}
}
2015-06-29 20:45:52 +08:00
2020-08-25 04:19:57 +08:00
conversation_id = " conversation # " + message [ 'ConversationID' ] . to_s
message_id = " message # " + message [ 'MessageID' ] . to_s
2015-06-29 20:45:52 +08:00
2020-08-25 04:19:57 +08:00
imported_conversation = topic_lookup_from_imported_post_id ( conversation_id )
2016-01-16 05:47:05 +08:00
2020-08-25 04:19:57 +08:00
if imported_conversation . present?
common . merge ( id : message_id , topic_id : imported_conversation [ :topic_id ] )
else
user_ids = ( message [ 'Contributors' ] || '' ) . scan ( / \ "( \ d+) \ " / ) . flatten . map ( & :to_i )
usernames = user_ids . map { | id | @lookup . find_user_by_import_id ( id ) . try ( :username ) } . compact
usernames = [ @lookup . find_user_by_import_id ( message [ 'InsertUserID' ] ) . try ( :username ) ] . compact if usernames . empty?
title = body . truncate ( 40 )
{
id : conversation_id ,
title : title ,
archetype : Archetype . private_message ,
target_usernames : usernames . uniq ,
} . merge ( common )
end
end
2016-01-16 05:47:05 +08:00
end
2015-06-29 20:45:52 +08:00
end
2021-08-12 17:37:44 +08:00
def get_raw ( record , user_id )
format = ( record [ 'Format' ] || " " ) . downcase
body = record [ 'Body' ]
case format
when " html "
process_raw ( body )
when " rich "
VanillaBodyParser . new ( record , user_id ) . parse
when " markdown "
process_raw ( body , skip_reverse_markdown : true )
else
@vb_parser ? VanillaBodyParser . new ( record , user_id ) . parse : process_raw ( body )
end
end
def process_raw ( raw , skip_reverse_markdown : false )
2021-03-12 00:21:56 +08:00
return if raw == nil
raw = @htmlentities . decode ( raw )
# convert user profile links to user mentions
raw . gsub! ( / <a.*>(@ \ S+?)< \/ a> / ) { $1 }
2021-08-12 17:37:44 +08:00
raw = ReverseMarkdown . convert ( raw ) unless skip_reverse_markdown
2021-03-12 00:21:56 +08:00
raw . scrub!
raw
end
2016-06-28 04:17:00 +08:00
def staff_guardian
@_staff_guardian || = Guardian . new ( Discourse . system_user )
end
2015-06-29 20:45:52 +08:00
def mysql_query ( sql )
2016-01-13 07:45:19 +08:00
@client . query ( sql )
# @client.query(sql, cache_rows: false) #segfault: cache_rows: false causes segmentation fault
2015-06-29 20:45:52 +08:00
end
2016-05-17 23:16:41 +08:00
def create_permalinks
puts '' , 'Creating redirects...' , ''
User . find_each do | u |
ucf = u . custom_fields
if ucf && ucf [ " import_id " ] && ucf [ " import_username " ]
2020-08-25 04:19:57 +08:00
encoded_username = CGI . escape ( ucf [ 'import_username' ] ) . gsub ( '+' , '%20' )
Permalink . create ( url : " profile/ #{ ucf [ 'import_id' ] } / #{ encoded_username } " , external_url : " /users/ #{ u . username } " ) rescue nil
2016-07-06 22:58:43 +08:00
print '.'
2016-05-17 23:16:41 +08:00
end
end
Post . find_each do | post |
pcf = post . custom_fields
if pcf && pcf [ " import_id " ]
topic = post . topic
id = pcf [ " import_id " ] . split ( '#' ) . last
if post . post_number == 1
slug = Slug . for ( topic . title ) # probably matches what vanilla would do...
2016-05-18 05:43:46 +08:00
Permalink . create ( url : " discussion/ #{ id } / #{ slug } " , topic_id : topic . id ) rescue nil
2016-05-17 23:16:41 +08:00
else
2016-05-18 05:43:46 +08:00
Permalink . create ( url : " discussion/comment/ #{ id } " , post_id : post . id ) rescue nil
2016-05-17 23:16:41 +08:00
end
2016-07-06 22:58:43 +08:00
print '.'
2016-05-17 23:16:41 +08:00
end
end
end
2021-01-14 01:40:00 +08:00
def import_attachments
2022-01-06 01:45:08 +08:00
if ATTACHMENTS_BASE_DIR && File . exist? ( ATTACHMENTS_BASE_DIR )
2021-01-14 01:40:00 +08:00
puts " " , " importing attachments "
start = Time . now
count = 0
# https://us.v-cdn.net/1234567/uploads/editor/xyz/image.jpg
cdn_regex = / https: \/ \/ us.v-cdn.net \/ 1234567 \/ uploads \/ ( \ S+ \/ ( \ w|-)+. \ w+) /i
# [attachment=10109:Screen Shot 2012-04-01 at 3.47.35 AM.png]
attachment_regex = / \ [attachment=( \ d+):(.*?) \ ] /i
Post . where ( " raw LIKE '%/us.v-cdn.net/%' OR raw LIKE '%[attachment%' " ) . find_each do | post |
count += 1
print " \r %7d - %6d/sec " % [ count , count . to_f / ( Time . now - start ) ]
new_raw = post . raw . dup
new_raw . gsub! ( attachment_regex ) do | s |
matches = attachment_regex . match ( s )
attachment_id = matches [ 1 ]
file_name = matches [ 2 ]
next unless attachment_id
r = mysql_query ( " SELECT Path, Name FROM #{ TABLE_PREFIX } Media WHERE MediaID = #{ attachment_id } ; " ) . first
next if r . nil?
path = r [ " Path " ]
name = r [ " Name " ]
next unless path . present?
path . gsub! ( " s3://content/ " , " " )
path . gsub! ( " s3://uploads/ " , " " )
file_path = " #{ ATTACHMENTS_BASE_DIR } / #{ path } "
2022-01-06 01:45:08 +08:00
if File . exist? ( file_path )
2021-01-14 01:40:00 +08:00
upload = create_upload ( post . user . id , file_path , File . basename ( file_path ) )
if upload && upload . errors . empty?
# upload.url
filename = name || file_name || File . basename ( file_path )
html_for_upload ( upload , normalize_text ( filename ) )
else
puts " Error: Upload did not persist for #{ post . id } #{ attachment_id } ! "
end
else
puts " Couldn't find file for #{ attachment_id } . Skipping. "
next
end
end
new_raw . gsub! ( cdn_regex ) do | s |
matches = cdn_regex . match ( s )
attachment_id = matches [ 1 ]
file_path = " #{ ATTACHMENTS_BASE_DIR } / #{ attachment_id } "
2022-01-06 01:45:08 +08:00
if File . exist? ( file_path )
2021-01-14 01:40:00 +08:00
upload = create_upload ( post . user . id , file_path , File . basename ( file_path ) )
if upload && upload . errors . empty?
upload . url
else
puts " Error: Upload did not persist for #{ post . id } #{ attachment_id } ! "
end
else
puts " Couldn't find file for #{ attachment_id } . Skipping. "
next
end
end
if new_raw != post . raw
begin
PostRevisor . new ( post ) . revise! ( post . user , { raw : new_raw } , skip_revision : true , skip_validations : true , bypass_bump : true )
rescue
puts " PostRevisor error for #{ post . id } "
post . raw = new_raw
post . save ( validate : false )
end
end
end
end
end
def mark_topics_as_solved
puts " " , " Marking topics as solved... "
DB . exec << ~ SQL
INSERT INTO topic_custom_fields ( name , value , topic_id , created_at , updated_at )
SELECT 'accepted_answer_post_id' , pcf . post_id , p . topic_id , p . created_at , p . created_at
FROM post_custom_fields pcf
JOIN posts p ON p . id = pcf . post_id
2021-03-12 00:21:56 +08:00
WHERE pcf . name = 'is_accepted_answer' AND pcf . value = 't'
2021-01-14 01:40:00 +08:00
AND NOT EXISTS (
SELECT 1
FROM topic_custom_fields x
WHERE x . topic_id = p . topic_id AND x . name = 'accepted_answer_post_id'
)
ON CONFLICT DO NOTHING
SQL
end
2015-06-29 20:45:52 +08:00
end
ImportScripts :: VanillaSQL . new . perform