2019-05-03 06:17:27 +08:00
# frozen_string_literal: true
2015-07-14 13:53:38 +08:00
# Notes:
#
# Written by Sam
#
# Lithium are quite protective of data, there is no simple way of exporting
# If you have leverage you may get a data dump, in my case it was provided in XML
# format
#
# First step is to convert it to db format so you can import it into a DB
# that was done using import_scripts/support/convert_mysql_xml_to_mysql.rb
#
require " mysql2 "
2015-07-28 15:46:00 +08:00
require " csv "
2015-07-14 13:53:38 +08:00
require " reverse_markdown "
require File . expand_path ( File . dirname ( __FILE__ ) + " /base.rb " )
require " htmlentities "
2015-07-20 15:25:27 +08:00
# remove table conversion
% i [ table td tr th thead tbody ] . each { | tag | ReverseMarkdown :: Converters . unregister ( tag ) }
2015-07-14 13:53:38 +08:00
class ImportScripts :: Lithium < ImportScripts :: Base
BATCH_SIZE = 1000
# CHANGE THESE BEFORE RUNNING THE IMPORTER
DATABASE = " wd "
PASSWORD = " password "
2018-05-21 16:04:52 +08:00
AVATAR_DIR = " /tmp/avatars "
ATTACHMENT_DIR = " /tmp/attachments "
2015-07-29 16:11:55 +08:00
UPLOAD_DIR = " /tmp/uploads "
2015-07-14 13:53:38 +08:00
2015-07-29 16:11:55 +08:00
OLD_DOMAIN = " community.wd.com "
2015-07-15 13:34:16 +08:00
TEMP = " "
2018-05-18 19:41:20 +08:00
USER_CUSTOM_FIELDS = [
{ name : " sso_id " , user : " sso_id " } ,
{ name : " user_field_1 " , profile : " jobtitle " } ,
{ name : " user_field_2 " , profile : " company " } ,
{ name : " user_field_3 " , profile : " industry " } ,
]
LITHIUM_PROFILE_FIELDS =
" 'profile.jobtitle', 'profile.company', 'profile.industry', 'profile.location' "
USERNAME_MAPPINGS = { admins : " admin_user " } . with_indifferent_access
2015-07-14 13:53:38 +08:00
def initialize
super
@old_username_to_new_usernames = { }
@htmlentities = HTMLEntities . new
@client =
Mysql2 :: Client . new (
host : " localhost " ,
username : " root " ,
password : PASSWORD ,
database : DATABASE ,
)
end
def execute
2015-10-16 07:40:52 +08:00
@max_start_id = Post . maximum ( :id )
2015-07-28 15:46:00 +08:00
2018-05-16 19:56:15 +08:00
import_groups
2015-07-29 16:11:55 +08:00
import_categories
2015-10-16 07:40:52 +08:00
import_users
2018-06-18 23:08:57 +08:00
import_user_visits
2015-10-16 07:40:52 +08:00
import_topics
import_posts
import_likes
import_accepted_answers
import_pms
close_topics
create_permalinks
2015-07-16 11:08:59 +08:00
2015-07-29 16:11:55 +08:00
post_process_posts
2015-07-14 13:53:38 +08:00
end
def import_groups
puts " " , " importing groups... "
groups = mysql_query <<-SQL
2018-05-16 19:56:15 +08:00
SELECT DISTINCT name
FROM roles
ORDER BY name
2015-07-14 13:53:38 +08:00
SQL
create_groups ( groups ) do | group |
2018-05-16 19:56:15 +08:00
{ id : group [ " name " ] , name : @htmlentities . decode ( group [ " name " ] ) . strip }
2015-07-14 13:53:38 +08:00
end
end
def import_users
puts " " , " importing users "
user_count = mysql_query ( " SELECT COUNT ( * ) count FROM users " ).first[ " count " ]
2018-05-21 16:04:52 +08:00
avatar_files = Dir . entries ( AVATAR_DIR )
2018-06-19 02:51:48 +08:00
duplicate_emails =
mysql_query (
" SELECT email_lower FROM users GROUP BY email_lower HAVING COUNT(email_lower) > 1 " ,
) . map { | e | [ e [ " email_lower " ] , 0 ] } . to_h
2015-07-14 13:53:38 +08:00
batches ( BATCH_SIZE ) do | offset |
users = mysql_query <<-SQL
2018-05-18 19:41:20 +08:00
SELECT id , nlogin , login_canon , email , registration_time , sso_id
2015-07-14 13:53:38 +08:00
FROM users
ORDER BY id
LIMIT #{BATCH_SIZE}
OFFSET #{offset}
SQL
break if users . size < 1
2015-09-22 07:48:42 +08:00
next if all_records_exist? :users , users . map { | u | u [ " id " ] . to_i }
2018-05-18 19:41:20 +08:00
users = users . to_a
first_id = users . first [ " id " ]
last_id = users . last [ " id " ]
profiles = mysql_query <<-SQL
SELECT user_id , param , nvalue
FROM user_profile
WHERE nvalue IS NOT NULL AND param IN ( #{LITHIUM_PROFILE_FIELDS}) AND user_id >= #{first_id} AND user_id <= #{last_id}
ORDER BY user_id
SQL
2015-07-14 13:53:38 +08:00
create_users ( users , total : user_count , offset : offset ) do | user |
2018-06-16 21:40:55 +08:00
user_id = user [ " id " ]
profile = profiles . select { | p | p [ " user_id " ] == user_id }
2018-05-18 19:41:20 +08:00
result = profile . select { | p | p [ " param " ] == " profile.location " }
location = result . count > 0 ? result . first [ " nvalue " ] : nil
username = user [ " login_canon " ]
username = USERNAME_MAPPINGS [ username ] if USERNAME_MAPPINGS [ username ] . present?
2015-07-14 13:53:38 +08:00
2018-06-07 21:41:55 +08:00
email = user [ " email " ] . presence || fake_email
2018-06-16 23:04:37 +08:00
email_lower = email . downcase
if duplicate_emails . key? ( email_lower )
duplicate_emails [ email_lower ] += 1
2018-06-19 02:51:48 +08:00
email . sub! ( " @ " , " + #{ duplicate_emails [ email_lower ] } @ " ) if duplicate_emails [ email_lower ] > 1
2018-06-07 21:41:55 +08:00
end
2015-07-14 13:53:38 +08:00
{
2018-06-16 21:40:55 +08:00
id : user_id ,
2015-07-14 13:53:38 +08:00
name : user [ " nlogin " ] ,
2018-05-18 19:41:20 +08:00
username : username ,
2018-06-07 21:41:55 +08:00
email : email ,
2018-05-18 19:41:20 +08:00
location : location ,
custom_fields : user_custom_fields ( user , profile ) ,
2015-07-14 13:53:38 +08:00
# website: user["homepage"].strip,
# title: @htmlentities.decode(user["usertitle"]).strip,
# primary_group_id: group_id_from_imported_group_id(user["usergroupid"]),
created_at : unix_time ( user [ " registration_time " ] ) ,
2015-07-29 16:11:55 +08:00
post_create_action :
proc do | u |
2018-05-18 19:41:20 +08:00
@old_username_to_new_usernames [ user [ " login_canon " ] ] = u . username
2023-01-07 19:53:14 +08:00
2018-05-18 19:41:20 +08:00
# import user avatar
sso_id = u . custom_fields [ " sso_id " ]
if sso_id . present?
2018-05-21 16:13:23 +08:00
prefix = " #{ AVATAR_DIR } / #{ sso_id } _ "
2018-05-18 19:41:20 +08:00
file = get_file ( prefix + " actual.jpeg " )
file || = get_file ( prefix + " profile.jpeg " )
2023-01-07 19:53:14 +08:00
2018-05-18 19:41:20 +08:00
if file . present?
2018-05-21 16:17:30 +08:00
upload = UploadCreator . new ( file , file . path , type : " avatar " ) . create_for ( u . id )
u . create_user_avatar unless u . user_avatar
2023-01-07 19:53:14 +08:00
2018-05-21 16:17:30 +08:00
if ! u . user_avatar . contains_upload? ( upload . id )
u . user_avatar . update_columns ( custom_upload_id : upload . id )
2023-01-07 19:53:14 +08:00
2018-05-21 16:17:30 +08:00
if u . uploaded_avatar_id . nil? ||
! u . user_avatar . contains_upload? ( u . uploaded_avatar_id )
u . update_columns ( uploaded_avatar_id : upload . id )
2023-01-07 19:53:14 +08:00
end
2018-05-18 19:41:20 +08:00
end
end
end
2015-07-29 16:11:55 +08:00
end ,
2015-07-14 13:53:38 +08:00
}
end
end
end
2018-06-18 23:08:57 +08:00
def import_user_visits
puts " " , " importing user visits "
batches ( BATCH_SIZE ) do | offset |
visits = mysql_query <<-SQL
SELECT user_id , login_time
FROM user_log
ORDER BY user_id
LIMIT #{BATCH_SIZE}
OFFSET #{offset}
SQL
break if visits . size < 1
user_ids = visits . uniq { | v | v [ " user_id " ] }
user_ids . each do | user_id |
user = UserCustomField . find_by ( name : " import_id " , value : user_id ) . try ( :user )
raise " User not found for id #{ user_id } " if user . blank?
user_visits = visits . select { | v | v [ " user_id " ] == user_id }
user_visits . each do | v |
date = unix_time ( v [ " login_time " ] )
user . update_visit_record! ( date )
end
end
end
end
2018-05-18 19:41:20 +08:00
def user_custom_fields ( user , profile )
fields = Hash . new
USER_CUSTOM_FIELDS . each do | attr |
name = attr [ :name ]
if attr [ :user ] . present?
fields [ name ] = user [ attr [ :user ] ]
elsif attr [ :profile ] . present? && profile . count > 0
result = profile . select { | p | p [ " param " ] == " profile. #{ attr [ :profile ] } " }
fields [ name ] = result . first [ " nvalue " ] if result . count > 0
end
end
fields
end
def get_file ( path )
return File . open ( path ) if File . exist? ( path )
nil
end
2015-07-14 13:53:38 +08:00
def unix_time ( t )
Time . at ( t / 1000 . 0 )
end
def import_profile_picture ( old_user , imported_user )
query = mysql_query <<-SQL
SELECT filedata , filename
FROM customavatar
WHERE userid = #{old_user["userid"]}
ORDER BY dateline DESC
LIMIT 1
SQL
picture = query . first
return if picture . nil?
file = Tempfile . new ( " profile-picture " )
file . write ( picture [ " filedata " ] . encode ( " ASCII-8BIT " ) . force_encoding ( " UTF-8 " ) )
file . rewind
2017-05-11 06:16:57 +08:00
upload = UploadCreator . new ( file , picture [ " filename " ] ) . create_for ( imported_user . id )
2015-07-14 13:53:38 +08:00
return if ! upload . persisted?
imported_user . create_user_avatar
imported_user . user_avatar . update ( custom_upload_id : upload . id )
imported_user . update ( uploaded_avatar_id : upload . id )
ensure
2023-01-07 19:53:14 +08:00
begin
2015-07-14 13:53:38 +08:00
file . close
rescue StandardError
nil
2023-01-07 19:53:14 +08:00
end
begin
2015-07-14 13:53:38 +08:00
file . unlind
rescue StandardError
nil
2023-01-07 19:53:14 +08:00
end
2015-07-14 13:53:38 +08:00
end
def import_profile_background ( old_user , imported_user )
query = mysql_query <<-SQL
SELECT filedata , filename
FROM customprofilepic
WHERE userid = #{old_user["userid"]}
ORDER BY dateline DESC
LIMIT 1
SQL
background = query . first
return if background . nil?
file = Tempfile . new ( " profile-background " )
file . write ( background [ " filedata " ] . encode ( " ASCII-8BIT " ) . force_encoding ( " UTF-8 " ) )
file . rewind
2017-05-11 06:16:57 +08:00
upload = UploadCreator . new ( file , background [ " filename " ] ) . create_for ( imported_user . id )
2015-07-14 13:53:38 +08:00
return if ! upload . persisted?
2019-04-29 11:58:52 +08:00
imported_user . user_profile . upload_profile_background ( upload )
2015-07-14 13:53:38 +08:00
ensure
2023-01-07 19:53:14 +08:00
begin
2015-07-14 13:53:38 +08:00
file . close
rescue StandardError
nil
2023-01-07 19:53:14 +08:00
end
begin
2015-07-14 13:53:38 +08:00
file . unlink
rescue StandardError
nil
2023-01-07 19:53:14 +08:00
end
2015-07-14 13:53:38 +08:00
end
def import_categories
puts " " , " importing top level categories... "
2018-05-18 04:04:31 +08:00
categories = mysql_query <<-SQL
SELECT n . node_id , n . display_id , c . nvalue c_title , b . nvalue b_title , n . position , n . parent_node_id , n . type_id
FROM nodes n
LEFT JOIN settings c ON n . node_id = c . node_id AND c . param = 'category.title'
LEFT JOIN settings b ON n . node_id = b . node_id AND b . param = 'board.title'
ORDER BY n . type_id DESC , n . node_id ASC
SQL
2015-07-28 15:46:00 +08:00
2018-05-18 04:04:31 +08:00
categories =
categories . map { | c | ( c [ " name " ] = c [ " c_title " ] || c [ " b_title " ] || c [ " display_id " ] ) && c }
2015-07-28 15:46:00 +08:00
2018-05-18 04:04:31 +08:00
# To prevent duplicate category names
categories =
categories . map do | category |
count = categories . to_a . count { | c | c [ " name " ] . present? && c [ " name " ] == category [ " name " ] }
category [ " name " ] << " ( #{ category [ " node_id " ] } ) " if count > 1
category
2015-07-28 15:46:00 +08:00
end
2018-05-18 04:04:31 +08:00
parent_categories = categories . select { | c | c [ " parent_node_id " ] < = 2 }
2015-07-28 15:46:00 +08:00
2018-05-18 04:04:31 +08:00
create_categories ( parent_categories ) do | category |
2015-07-14 13:53:38 +08:00
{
2018-05-18 04:04:31 +08:00
id : category [ " node_id " ] ,
2018-12-04 17:48:16 +08:00
name : category [ " name " ] ,
2018-05-18 04:04:31 +08:00
position : category [ " position " ] ,
post_create_action : lambda { | record | after_category_create ( record , category ) } ,
2015-07-14 13:53:38 +08:00
}
end
puts " " , " importing children categories... "
2018-05-18 04:04:31 +08:00
children_categories = categories . select { | c | c [ " parent_node_id " ] > 2 }
2015-07-14 13:53:38 +08:00
create_categories ( children_categories ) do | category |
{
2018-05-18 04:04:31 +08:00
id : category [ " node_id " ] ,
name : category [ " name " ] ,
2015-07-14 13:53:38 +08:00
position : category [ " position " ] ,
2018-05-18 04:04:31 +08:00
parent_category_id : category_id_from_imported_category_id ( category [ " parent_node_id " ] ) ,
post_create_action : lambda { | record | after_category_create ( record , category ) } ,
2015-07-14 13:53:38 +08:00
}
end
2018-05-18 04:04:31 +08:00
end
2015-07-28 15:46:00 +08:00
2018-05-18 04:04:31 +08:00
def after_category_create ( category , params )
node_id = category . custom_fields [ " import_id " ]
roles = mysql_query <<-SQL
SELECT name
FROM roles
WHERE node_id = #{node_id}
SQL
if roles . count > 0
category . update ( read_restricted : true )
roles . each do | role |
group_id = group_id_from_imported_group_id ( role [ " name " ] )
if group_id . present?
CategoryGroup . find_or_create_by ( category : category , group_id : group_id ) do | cg |
cg . permission_type = CategoryGroup . permission_types [ :full ]
end
else
puts " " , " Group not found for id ' #{ role [ " name " ] } ' "
2015-07-28 15:46:00 +08:00
end
end
end
2015-07-14 13:53:38 +08:00
end
2018-05-19 13:54:48 +08:00
def staff_guardian
@_staff_guardian || = Guardian . new ( Discourse . system_user )
end
2015-07-14 13:53:38 +08:00
def import_topics
puts " " , " importing topics... "
2018-05-19 13:54:48 +08:00
SiteSetting . tagging_enabled = true
default_max_tags_per_topic = SiteSetting . max_tags_per_topic
default_max_tag_length = SiteSetting . max_tag_length
SiteSetting . max_tags_per_topic = 10
SiteSetting . max_tag_length = 100
2015-07-14 13:53:38 +08:00
topic_count =
mysql_query ( " SELECT COUNT(*) count FROM message2 where id = root_id " ) . first [ " count " ]
2018-05-19 13:54:48 +08:00
topic_tags =
mysql_query (
" SELECT e.target_id, GROUP_CONCAT(l.tag_text SEPARATOR ',') tags FROM tag_events_label_message e LEFT JOIN tags_label l ON e.tag_id = l.tag_id GROUP BY e.target_id " ,
)
2015-07-14 13:53:38 +08:00
batches ( BATCH_SIZE ) do | offset |
topics = mysql_query <<-SQL
SELECT id , subject , body , deleted , user_id ,
2018-05-18 21:23:18 +08:00
post_date , views , node_id , unique_id , row_version
2015-07-14 13:53:38 +08:00
FROM message2
2018-05-18 19:41:20 +08:00
WHERE id = root_id #{TEMP}
2015-07-15 13:34:16 +08:00
ORDER BY node_id , id
2015-07-14 13:53:38 +08:00
LIMIT #{BATCH_SIZE}
OFFSET #{offset}
SQL
break if topics . size < 1
2015-09-22 07:48:42 +08:00
next if all_records_exist? :posts , topics . map { | topic | " #{ topic [ " node_id " ] } #{ topic [ " id " ] } " }
2015-07-14 13:53:38 +08:00
create_posts ( topics , total : topic_count , offset : offset ) do | topic |
2015-07-28 15:46:00 +08:00
category_id = category_id_from_imported_category_id ( topic [ " node_id " ] )
2018-05-18 21:23:18 +08:00
deleted_at = topic [ " deleted " ] == 1 ? topic [ " row_version " ] : nil
2015-07-29 16:11:55 +08:00
raw = topic [ " body " ]
2015-07-14 13:53:38 +08:00
2018-05-18 21:23:18 +08:00
if category_id . present? && raw . present?
2015-07-28 15:46:00 +08:00
{
id : " #{ topic [ " node_id " ] } #{ topic [ " id " ] } " ,
user_id : user_id_from_imported_user_id ( topic [ " user_id " ] ) || Discourse :: SYSTEM_USER_ID ,
title : @htmlentities . decode ( topic [ " subject " ] ) . strip [ 0 ... 255 ] ,
category : category_id ,
raw : raw ,
created_at : unix_time ( topic [ " post_date " ] ) ,
2018-05-18 21:23:18 +08:00
deleted_at : deleted_at ,
2015-07-28 15:46:00 +08:00
views : topic [ " views " ] ,
custom_fields : {
import_unique_id : topic [ " unique_id " ] ,
} ,
2018-05-19 13:54:48 +08:00
import_mode : true ,
post_create_action :
proc do | post |
result = topic_tags . select { | t | t [ " target_id " ] == topic [ " unique_id " ] }
if result . count > 0
tag_names = result . first [ " tags " ] . split ( " , " )
DiscourseTagging . tag_topic_by_names ( post . topic , staff_guardian , tag_names )
end
end ,
2015-07-28 15:46:00 +08:00
}
else
2018-05-18 21:23:18 +08:00
message = " Unknown "
message = " Category ' #{ category_id } ' not exist " if category_id . blank?
message = " Topic 'body' is empty " if raw . blank?
2018-05-19 13:54:48 +08:00
PluginStoreRow . find_or_create_by (
plugin_name : " topic_import_log " ,
key : topic [ " unique_id " ] . to_s ,
value : message ,
type_name : " String " ,
)
2015-07-28 15:46:00 +08:00
nil
end
2015-07-14 13:53:38 +08:00
end
end
2018-05-19 13:54:48 +08:00
SiteSetting . max_tags_per_topic = default_max_tags_per_topic
SiteSetting . max_tag_length = default_max_tag_length
2015-07-14 13:53:38 +08:00
end
def import_posts
post_count =
mysql_query (
" SELECT COUNT(*) count FROM message2
WHERE id < > root_id " ,
) . first [
" count "
]
2015-10-15 10:25:10 +08:00
puts " " , " importing posts... ( #{ post_count } ) "
2015-07-14 13:53:38 +08:00
batches ( BATCH_SIZE ) do | offset |
posts = mysql_query <<-SQL
SELECT id , body , deleted , user_id ,
2018-05-18 21:23:18 +08:00
post_date , parent_id , root_id , node_id , unique_id , row_version
2015-07-14 13:53:38 +08:00
FROM message2
2018-05-18 21:23:18 +08:00
WHERE id < > root_id #{TEMP}
2015-07-15 13:34:16 +08:00
ORDER BY node_id , root_id , id
2015-07-14 13:53:38 +08:00
LIMIT #{BATCH_SIZE}
OFFSET #{offset}
SQL
break if posts . size < 1
2015-09-22 07:48:42 +08:00
if all_records_exist? :posts ,
posts . map { | post |
" #{ post [ " node_id " ] } #{ post [ " root_id " ] } #{ post [ " id " ] } "
}
next
2023-01-07 19:53:14 +08:00
end
2015-09-22 07:48:42 +08:00
2015-07-14 13:53:38 +08:00
create_posts ( posts , total : post_count , offset : offset ) do | post |
2015-07-29 16:11:55 +08:00
raw = post [ " raw " ]
2015-07-15 13:34:16 +08:00
unless topic = topic_lookup_from_imported_post_id ( " #{ post [ " node_id " ] } #{ post [ " root_id " ] } " )
next
2023-01-07 19:53:14 +08:00
end
2015-07-14 13:53:38 +08:00
2018-05-18 21:23:18 +08:00
deleted_at = topic [ " deleted " ] == 1 ? topic [ " row_version " ] : nil
2015-07-29 16:11:55 +08:00
raw = post [ " body " ]
2015-07-14 13:53:38 +08:00
2018-05-18 21:23:18 +08:00
if raw . present?
new_post = {
id : " #{ post [ " node_id " ] } #{ post [ " root_id " ] } #{ post [ " id " ] } " ,
user_id : user_id_from_imported_user_id ( post [ " user_id " ] ) || Discourse :: SYSTEM_USER_ID ,
topic_id : topic [ :topic_id ] ,
raw : raw ,
created_at : unix_time ( post [ " post_date " ] ) ,
deleted_at : deleted_at ,
custom_fields : {
import_unique_id : post [ " unique_id " ] ,
} ,
import_mode : true ,
}
2015-07-14 13:53:38 +08:00
2018-05-18 21:23:18 +08:00
if parent =
topic_lookup_from_imported_post_id (
" #{ post [ " node_id " ] } #{ post [ " root_id " ] } #{ post [ " parent_id " ] } " ,
)
new_post [ :reply_to_post_number ] = parent [ :post_number ]
end
2015-07-14 13:53:38 +08:00
2018-05-18 21:23:18 +08:00
new_post
else
2018-05-19 13:54:48 +08:00
PluginStoreRow . find_or_create_by (
plugin_name : " post_import_log " ,
key : post [ " unique_id " ] . to_s ,
value : " Post 'body' is empty " ,
type_name : " String " ,
)
2018-05-18 21:23:18 +08:00
nil
2015-07-14 13:53:38 +08:00
end
end
end
end
2015-07-28 15:46:00 +08:00
SMILEY_SUBS = {
" smileyhappy " = > " smiley " ,
" smileyindifferent " = > " neutral_face " ,
" smileymad " = > " angry " ,
" smileysad " = > " cry " ,
" smileysurprised " = > " dizzy_face " ,
" smileytongue " = > " stuck_out_tongue " ,
" smileyvery-happy " = > " grin " ,
" smileywink " = > " wink " ,
" smileyfrustrated " = > " confounded " ,
" smileyembarrassed " = > " flushed " ,
" smileylol " = > " laughing " ,
" cathappy " = > " smiley_cat " ,
" catindifferent " = > " cat " ,
" catmad " = > " smirk_cat " ,
" catsad " = > " crying_cat_face " ,
" catsurprised " = > " scream_cat " ,
" cattongue " = > " stuck_out_tongue " ,
" catvery-happy " = > " smile_cat " ,
" catwink " = > " wink " ,
" catfrustrated " = > " grumpycat " ,
" catembarrassed " = > " kissing_cat " ,
" catlol " = > " joy_cat " ,
}
2015-07-16 11:08:59 +08:00
def import_likes
puts " \n importing likes... "
2018-05-19 01:27:15 +08:00
sql =
" select source_id user_id, target_id post_id, row_version created_at from tag_events_score_message "
2015-07-16 11:08:59 +08:00
results = mysql_query ( sql )
puts " loading unique id map "
existing_map = { }
PostCustomField
. where ( name : " import_unique_id " )
. pluck ( :post_id , :value )
. each { | post_id , import_id | existing_map [ import_id ] = post_id }
puts " loading data into temp table "
2018-06-19 14:13:14 +08:00
DB . exec (
" create temp table like_data(user_id int, post_id int, created_at timestamp without time zone) " ,
)
2015-07-16 11:08:59 +08:00
PostAction . transaction do
results . each do | result |
2015-07-17 15:16:26 +08:00
result [ " user_id " ] = user_id_from_imported_user_id ( result [ " user_id " ] . to_s )
2015-07-16 11:08:59 +08:00
result [ " post_id " ] = existing_map [ result [ " post_id " ] . to_s ]
next unless result [ " user_id " ] && result [ " post_id " ]
2018-06-19 14:13:14 +08:00
DB . exec (
" INSERT INTO like_data VALUES (:user_id,:post_id,:created_at) " ,
user_id : result [ " user_id " ] ,
post_id : result [ " post_id " ] ,
created_at : result [ " created_at " ] ,
)
2015-07-16 11:08:59 +08:00
end
end
puts " creating missing post actions "
2018-06-19 14:13:14 +08:00
DB . exec << ~ SQL
2015-07-16 11:08:59 +08:00
INSERT INTO post_actions ( post_id , user_id , post_action_type_id , created_at , updated_at )
SELECT l . post_id , l . user_id , 2 , l . created_at , l . created_at FROM like_data l
LEFT JOIN post_actions a ON a . post_id = l . post_id AND l . user_id = a . user_id AND a . post_action_type_id = 2
WHERE a . id IS NULL
SQL
puts " creating missing user actions "
2018-06-19 14:13:14 +08:00
DB . exec << ~ SQL
2015-07-16 11:08:59 +08:00
INSERT INTO user_actions ( user_id , action_type , target_topic_id , target_post_id , acting_user_id , created_at , updated_at )
SELECT pa . user_id , 1 , p . topic_id , p . id , pa . user_id , pa . created_at , pa . created_at
FROM post_actions pa
JOIN posts p ON p . id = pa . post_id
LEFT JOIN user_actions ua ON action_type = 1 AND ua . target_post_id = pa . post_id AND ua . user_id = pa . user_id
WHERE ua . id IS NULL AND pa . post_action_type_id = 2
SQL
# reverse action
2018-06-19 14:13:14 +08:00
DB . exec << ~ SQL
2015-07-16 11:08:59 +08:00
INSERT INTO user_actions ( user_id , action_type , target_topic_id , target_post_id , acting_user_id , created_at , updated_at )
SELECT p . user_id , 2 , p . topic_id , p . id , pa . user_id , pa . created_at , pa . created_at
FROM post_actions pa
JOIN posts p ON p . id = pa . post_id
LEFT JOIN user_actions ua ON action_type = 2 AND ua . target_post_id = pa . post_id AND
ua . acting_user_id = pa . user_id AND ua . user_id = p . user_id
WHERE ua . id IS NULL AND pa . post_action_type_id = 2
SQL
puts " updating like counts on posts "
2018-06-19 14:13:14 +08:00
DB . exec << ~ SQL
2015-07-16 11:08:59 +08:00
UPDATE posts SET like_count = coalesce ( cnt , 0 )
FROM (
SELECT post_id , count ( * ) cnt
FROM post_actions
WHERE post_action_type_id = 2 AND deleted_at IS NULL
GROUP BY post_id
) x
WHERE posts . like_count < > x . cnt AND posts . id = x . post_id
SQL
puts " updating like counts on topics "
2018-06-19 14:13:14 +08:00
DB . exec <<-SQL
2015-07-16 11:08:59 +08:00
UPDATE topics SET like_count = coalesce ( cnt , 0 )
FROM (
SELECT topic_id , sum ( like_count ) cnt
FROM posts
WHERE deleted_at IS NULL
GROUP BY topic_id
) x
WHERE topics . like_count < > x . cnt AND topics . id = x . topic_id
SQL
end
2015-07-17 10:41:45 +08:00
def import_accepted_answers
puts " \n importing accepted answers... "
sql =
" select unique_id post_id from message2 where (attributes & 0x4000 ) != 0 and deleted = 0; "
results = mysql_query ( sql )
puts " loading unique id map "
existing_map = { }
PostCustomField
. where ( name : " import_unique_id " )
. pluck ( :post_id , :value )
. each { | post_id , import_id | existing_map [ import_id ] = post_id }
puts " loading data into temp table "
2018-06-19 14:13:14 +08:00
DB . exec ( " create temp table accepted_data(post_id int primary key) " )
2015-07-17 10:41:45 +08:00
PostAction . transaction do
results . each do | result |
result [ " post_id " ] = existing_map [ result [ " post_id " ] . to_s ]
next unless result [ " post_id " ]
2018-06-19 14:13:14 +08:00
DB . exec ( " INSERT INTO accepted_data VALUES (:post_id) " , post_id : result [ " post_id " ] )
2015-07-17 10:41:45 +08:00
end
end
2015-07-17 15:16:26 +08:00
puts " deleting dupe answers "
2018-06-19 14:13:14 +08:00
DB . exec << ~ SQL
2015-07-17 15:16:26 +08:00
DELETE FROM accepted_data WHERE post_id NOT IN (
SELECT post_id FROM
(
SELECT topic_id , MIN ( post_id ) post_id
FROM accepted_data a
JOIN posts p ON p . id = a . post_id
GROUP BY topic_id
) X
)
SQL
puts " importing accepted answers "
2018-06-19 14:13:14 +08:00
DB . exec << ~ SQL
2015-07-17 15:16:26 +08:00
INSERT into post_custom_fields ( name , value , post_id , created_at , updated_at )
SELECT 'is_accepted_answer' , 'true' , a . post_id , current_timestamp , current_timestamp
FROM accepted_data a
LEFT JOIN post_custom_fields f ON name = 'is_accepted_answer' AND f . post_id = a . post_id
WHERE f . id IS NULL
SQL
puts " marking accepted topics "
2018-06-19 14:13:14 +08:00
DB . exec << ~ SQL
2015-07-17 15:16:26 +08:00
INSERT into topic_custom_fields ( name , value , topic_id , created_at , updated_at )
SELECT 'accepted_answer_post_id' , a . post_id :: varchar , p . topic_id , current_timestamp , current_timestamp
FROM accepted_data a
JOIN posts p ON p . id = a . post_id
LEFT JOIN topic_custom_fields f ON name = 'accepted_answer_post_id' AND f . topic_id = p . topic_id
WHERE f . id IS NULL
SQL
puts " done importing accepted answers "
2015-07-17 10:41:45 +08:00
end
2015-07-28 15:46:00 +08:00
def import_pms
puts " " , " importing pms... "
puts " determining participation records "
inbox = mysql_query ( " SELECT note_id, recipient_user_id user_id FROM tblia_notes_inbox " )
outbox = mysql_query ( " SELECT note_id, recipient_id user_id FROM tblia_notes_outbox " )
users = { }
[ inbox , outbox ] . each do | r |
r . each do | row |
ary = ( users [ row [ " note_id " ] ] || = Set . new )
user_id = user_id_from_imported_user_id ( row [ " user_id " ] )
ary << user_id if user_id
end
end
puts " untangling PM soup "
note_to_subject = { }
subject_to_first_note = { }
mysql_query (
" SELECT note_id, subject, sender_user_id FROM tblia_notes_content order by note_id " ,
) . each do | row |
user_id = user_id_from_imported_user_id ( row [ " sender_user_id " ] )
ary = ( users [ row [ " note_id " ] ] || = Set . new )
ary << user_id if user_id
note_to_subject [ row [ " note_id " ] ] = row [ " subject " ]
subject_to_first_note [ [ row [ " subject " ] , ary ] ] || = row [ " note_id " ] if row [ " subject " ] !~ / ^Re: /
end
puts " Loading user_id to username map "
user_map = { }
User . pluck ( :id , :username ) . each { | id , username | user_map [ id ] = username }
topic_count = mysql_query ( " SELECT COUNT(*) count FROM tblia_notes_content " ) . first [ " count " ]
batches ( BATCH_SIZE ) do | offset |
topics = mysql_query <<-SQL
SELECT note_id , subject , body , sender_user_id , sent_time
FROM tblia_notes_content
ORDER BY note_id
LIMIT #{BATCH_SIZE}
OFFSET #{offset}
SQL
break if topics . size < 1
2015-09-22 07:48:42 +08:00
next if all_records_exist? :posts , topics . map { | topic | " pm_ #{ topic [ " note_id " ] } " }
2015-07-28 15:46:00 +08:00
create_posts ( topics , total : topic_count , offset : offset ) do | topic |
2015-07-28 16:16:29 +08:00
user_id =
user_id_from_imported_user_id ( topic [ " sender_user_id " ] ) || Discourse :: SYSTEM_USER_ID
2015-07-28 15:46:00 +08:00
participants = users [ topic [ " note_id " ] ]
2015-07-28 16:16:29 +08:00
usernames = ( participants - [ user_id ] ) . map { | id | user_map [ id ] }
2015-07-28 15:46:00 +08:00
subject = topic [ " subject " ]
2015-07-28 16:16:29 +08:00
topic_id = nil
2015-07-28 15:46:00 +08:00
if subject =~ / ^Re: /
parent_id = subject_to_first_note [ [ subject [ 4 .. - 1 ] , participants ] ]
2015-07-28 16:16:29 +08:00
if parent_id
if t = topic_lookup_from_imported_post_id ( " pm_ #{ parent_id } " )
topic_id = t [ :topic_id ]
end
end
2015-07-28 15:46:00 +08:00
end
2015-07-29 16:11:55 +08:00
raw = topic [ " body " ]
2015-07-28 15:46:00 +08:00
2018-05-19 01:27:15 +08:00
if raw . present?
msg = {
id : " pm_ #{ topic [ " note_id " ] } " ,
user_id : user_id ,
raw : raw ,
created_at : unix_time ( topic [ " sent_time " ] ) ,
import_mode : true ,
}
2023-02-16 17:40:11 +08:00
if topic_id
msg [ :topic_id ] = topic_id
else
2018-05-19 01:27:15 +08:00
msg [ :title ] = @htmlentities . decode ( topic [ " subject " ] ) . strip [ 0 ... 255 ]
msg [ :archetype ] = Archetype . private_message
msg [ :target_usernames ] = usernames . join ( " , " )
end
2015-07-28 15:46:00 +08:00
2018-05-19 01:27:15 +08:00
msg
2015-07-28 16:16:29 +08:00
else
2018-05-19 13:54:48 +08:00
PluginStoreRow . find_or_create_by (
plugin_name : " pm_import_log " ,
key : topic [ " note_id " ] . to_s ,
value : " PM 'body' is empty " ,
type_name : " String " ,
)
2018-05-19 01:27:15 +08:00
nil
2015-07-28 15:46:00 +08:00
end
end
end
end
def close_topics
2015-08-03 14:18:28 +08:00
puts " \n closing closed topics... "
sql =
" select unique_id post_id from message2 where root_id = id AND (attributes & 0x0002 ) != 0; "
results = mysql_query ( sql )
# loading post map
existing_map = { }
PostCustomField
. where ( name : " import_unique_id " )
. pluck ( :post_id , :value )
. each { | post_id , import_id | existing_map [ import_id . to_i ] = post_id . to_i }
2023-01-07 19:53:14 +08:00
2015-08-03 14:18:28 +08:00
results
. map { | r | r [ " post_id " ] }
. each_slice ( 500 ) do | ids |
mapped = ids . map { | id | existing_map [ id ] } . compact
2018-06-19 14:13:14 +08:00
DB . exec ( << ~ SQL , ids : mapped ) if mapped . present?
UPDATE topics SET closed = true
WHERE id IN ( SELECT topic_id FROM posts where id in ( :ids ) )
SQL
2015-08-03 14:18:28 +08:00
end
2015-07-28 15:46:00 +08:00
end
2015-07-22 11:40:45 +08:00
def create_permalinks
puts " Creating permalinks "
SiteSetting . permalink_normalizations = '/t5\\/.*p\\/(\\d+).*//p/\\1'
sql = <<-SQL
INSERT INTO permalinks ( url , topic_id , created_at , updated_at )
SELECT '/p/' || value , p . topic_id , current_timestamp , current_timestamp
FROM post_custom_fields f
JOIN posts p on f . post_id = p . id AND post_number = 1
LEFT JOIN permalinks pm ON url = '/p/' || value
WHERE pm . id IS NULL AND f . name = 'import_unique_id'
SQL
2018-06-19 14:13:14 +08:00
r = DB . exec sql
puts " #{ r } permalinks to topics added! "
2015-07-22 11:40:45 +08:00
sql = <<-SQL
INSERT INTO permalinks ( url , post_id , created_at , updated_at )
SELECT '/p/' || value , p . id , current_timestamp , current_timestamp
FROM post_custom_fields f
JOIN posts p on f . post_id = p . id AND post_number < > 1
LEFT JOIN permalinks pm ON url = '/p/' || value
WHERE pm . id IS NULL AND f . name = 'import_unique_id'
SQL
2018-06-19 14:13:14 +08:00
r = DB . exec sql
puts " #{ r } permalinks to posts added! "
2015-07-22 11:40:45 +08:00
end
2018-05-21 16:04:52 +08:00
def find_upload ( user_id , attachment_id , real_filename )
2018-06-18 20:59:14 +08:00
filename = attachment_id . to_s . rjust ( 4 , " 0 " )
filename = File . join ( ATTACHMENT_DIR , " 000 #{ filename [ 0 ] } / #{ filename } .dat " )
2015-07-14 13:53:38 +08:00
2022-01-06 01:45:08 +08:00
unless File . exist? ( filename )
2015-07-14 13:53:38 +08:00
puts " Attachment file doesn't exist: #{ filename } "
return nil
end
real_filename . prepend SecureRandom . hex if real_filename [ 0 ] == " . "
2018-05-21 16:04:52 +08:00
upload = create_upload ( user_id , filename , real_filename )
2015-07-14 13:53:38 +08:00
if upload . nil? || ! upload . valid?
puts " Upload not valid :( "
puts upload . errors . inspect if upload
return nil
end
2019-11-15 04:10:51 +08:00
[ upload , real_filename ]
2015-07-14 13:53:38 +08:00
end
def post_process_posts
puts " " , " Postprocessing posts... "
2018-05-21 16:04:52 +08:00
default_extensions = SiteSetting . authorized_extensions
default_max_att_size = SiteSetting . max_attachment_size_kb
SiteSetting . authorized_extensions = " * "
SiteSetting . max_attachment_size_kb = 307_200
2015-07-14 13:53:38 +08:00
current = 0
max = Post . count
2023-01-07 19:53:14 +08:00
begin
2015-10-15 10:25:10 +08:00
mysql_query ( " create index idxUniqueId on message2(unique_id) " )
rescue StandardError
nil
2023-01-07 19:53:14 +08:00
end
2018-05-21 16:04:52 +08:00
attachments =
mysql_query (
" SELECT a.attachment_id, a.file_name, m.message_uid FROM tblia_attachment a INNER JOIN tblia_message_attachments m ON a.attachment_id = m.attachment_id " ,
)
2023-01-07 19:53:14 +08:00
2015-10-16 07:40:52 +08:00
Post
. where ( " id > ? " , @max_start_id )
. find_each do | post |
2015-07-14 13:53:38 +08:00
begin
2015-10-15 10:25:10 +08:00
id = post . custom_fields [ " import_unique_id " ]
next unless id
raw = mysql_query ( " select body from message2 where unique_id = ' #{ id } ' " ) . first [ " body " ]
unless raw
puts " Missing raw for post: #{ post . id } "
next
end
new_raw = postprocess_post_raw ( raw , post . user_id )
2018-05-21 16:04:52 +08:00
files = attachments . select { | a | a [ " message_uid " ] . to_s == id }
new_raw << html_for_attachments ( post . user_id , files )
unless post . raw == new_raw
post . raw = new_raw
post . cooked = post . cook ( new_raw )
cpp = CookedPostProcessor . new ( post )
2018-09-06 09:58:01 +08:00
cpp . link_post_uploads
2018-05-21 16:04:52 +08:00
post . custom_fields [ " import_post_process " ] = true
post . save
2023-01-07 19:53:14 +08:00
end
2015-07-14 13:53:38 +08:00
rescue PrettyText :: JavaScriptError
2015-10-15 10:25:10 +08:00
puts " GOT A JS error on post: #{ post . id } "
2023-01-07 19:53:14 +08:00
nil
ensure
2015-07-14 13:53:38 +08:00
print_status ( current += 1 , max )
2018-05-21 16:04:52 +08:00
end
2015-07-14 13:53:38 +08:00
end
2018-05-21 16:04:52 +08:00
SiteSetting . authorized_extensions = default_extensions
SiteSetting . max_attachment_size_kb = default_max_att_size
2015-07-14 13:53:38 +08:00
end
2015-07-29 16:11:55 +08:00
def postprocess_post_raw ( raw , user_id )
2018-06-26 07:46:03 +08:00
matches = raw . match ( %r{ <messagetemplate.*</messagetemplate> }m ) || [ ]
2018-06-26 07:37:32 +08:00
matches . each do | match |
hash = Hash . from_xml ( match )
template = hash [ " messagetemplate " ] [ " zone " ] [ " item " ]
content = ( template [ 0 ] || template ) [ " content " ] || " "
raw . sub! ( match , content )
end
2015-07-14 13:53:38 +08:00
2020-05-05 11:46:57 +08:00
doc = Nokogiri :: HTML5 . fragment ( raw )
2015-07-14 13:53:38 +08:00
2018-05-21 16:04:52 +08:00
doc
. css ( " a,img,li-image " )
. each do | l |
upload_name , image , linked_upload = [ nil ] * 3
if l . name == " li-image " && l [ " id " ]
upload_name = l [ " id " ]
else
uri =
2023-01-07 19:53:14 +08:00
begin
2018-05-21 16:04:52 +08:00
URI . parse ( l [ " href " ] || l [ " src " ] )
rescue StandardError
nil
2023-01-07 19:53:14 +08:00
end
2018-05-21 16:04:52 +08:00
uri . hostname = nil if uri && uri . hostname == OLD_DOMAIN
if uri && ! uri . hostname
if l [ " href " ]
l [ " href " ] = uri . path
# we have an internal link, lets see if we can remap it?
permalink =
2023-01-07 19:53:14 +08:00
begin
2018-05-21 16:04:52 +08:00
Permalink . find_by_url ( uri . path )
2015-07-14 13:53:38 +08:00
rescue StandardError
2023-01-07 19:53:14 +08:00
nil
end
2018-05-21 16:04:52 +08:00
if l [ " href " ]
2023-08-01 02:52:06 +08:00
l [ " href " ] = permalink . target_url if permalink && permalink . target_url
2018-05-21 16:04:52 +08:00
end
elsif l [ " src " ]
# we need an upload here
upload_name = $1 if uri . path =~ %r{ image-id/([^/]+) }
2015-08-03 15:35:35 +08:00
end
2015-07-29 16:11:55 +08:00
end
2018-05-21 16:04:52 +08:00
end
2015-07-14 13:53:38 +08:00
2018-05-21 16:04:52 +08:00
if upload_name
png = UPLOAD_DIR + " / " + upload_name + " .png "
jpg = UPLOAD_DIR + " / " + upload_name + " .jpg "
gif = UPLOAD_DIR + " / " + upload_name + " .gif "
2023-01-07 19:53:14 +08:00
2018-05-21 16:04:52 +08:00
# check to see if we have it
if File . exist? ( png )
image = png
2022-01-06 01:45:08 +08:00
elsif File . exist? ( jpg )
2018-05-21 16:04:52 +08:00
image = jpg
2022-01-06 01:45:08 +08:00
elsif File . exist? ( gif )
2018-05-21 16:04:52 +08:00
image = gif
2015-07-29 16:11:55 +08:00
end
2015-07-14 13:53:38 +08:00
2018-05-21 16:04:52 +08:00
if image
File . open ( image ) do | file |
upload =
UploadCreator . new (
file ,
" image. " + ( image . ends_with? ( " .png " ) ? " png " : " jpg " ) ,
) . create_for ( user_id )
l . name = " img " if l . name == " li-image "
l [ " src " ] = upload . url
2023-01-07 19:53:14 +08:00
end
else
2018-05-21 16:04:52 +08:00
puts " image was missing #{ l [ " src " ] } "
end
elsif linked_upload
segments = linked_upload . match ( %r{ /( \ d*)/( \ d)/([^.]*).( \ w*)$ } )
if segments . present?
lithium_post_id = segments [ 1 ]
attachment_number = segments [ 2 ]
result =
mysql_query (
" select a.attachment_id, f.file_name from tblia_message_attachments a
INNER JOIN message2 m ON a . message_uid = m . unique_id
INNER JOIN tblia_attachment f ON a . attachment_id = f . attachment_id
where m . id = #{lithium_post_id} AND a.attach_num = #{attachment_number} limit 0, 1",
)
2023-01-07 19:53:14 +08:00
2018-05-21 16:04:52 +08:00
result . each do | row |
upload , filename = find_upload ( user_id , row [ " attachment_id " ] , row [ " file_name " ] )
if upload . present?
l [ " href " ] = upload . url
else
puts " attachment was missing #{ l [ " href " ] } "
2023-01-07 19:53:14 +08:00
end
2018-05-21 16:04:52 +08:00
end
end
end
2015-07-14 13:53:38 +08:00
end
2018-05-21 20:49:22 +08:00
# for user mentions
doc
. css ( " li-user " )
. each do | l |
uid = l [ " uid " ]
2023-01-07 19:53:14 +08:00
2018-05-21 20:49:22 +08:00
if uid . present?
user = UserCustomField . find_by ( name : " import_id " , value : uid ) . try ( :user )
if user . present?
username = user . username
2018-06-18 20:59:14 +08:00
span = l . document . create_element " span "
2018-05-21 20:49:22 +08:00
span . inner_html = " @ #{ username } "
l . replace span
2023-01-07 19:53:14 +08:00
end
2018-05-21 20:49:22 +08:00
end
end
2015-07-29 16:11:55 +08:00
raw = ReverseMarkdown . convert ( doc . to_s )
raw . gsub! ( / ^ \ s* \ s*$ / , " " )
# ugly quotes
raw . gsub! ( / ^>[ \ s \ *]*$ / , " " )
raw . gsub! ( / :([a-z]+): / ) { | match | " : #{ SMILEY_SUBS [ $1 ] || $1 } : " }
# nbsp central
raw . gsub! ( / ([a-zA-Z0-9]) ([a-zA-Z0-9]) / , " \\ 1 \\ 2 " )
2015-07-14 13:53:38 +08:00
raw
end
2018-05-21 16:04:52 +08:00
def html_for_attachments ( user_id , files )
2019-05-31 04:20:57 +08:00
html = + " "
2018-05-21 16:04:52 +08:00
files . each do | file |
upload , filename = find_upload ( user_id , file [ " attachment_id " ] , file [ " file_name " ] )
if upload . present?
html << " \n " if html . present?
html << html_for_upload ( upload , filename )
end
end
html
end
2015-07-14 13:53:38 +08:00
def mysql_query ( sql )
2015-10-15 10:25:10 +08:00
@client . query ( sql , cache_rows : true )
2015-07-14 13:53:38 +08:00
end
end
ImportScripts :: Lithium . new . perform